diff --git a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out index de82857..f92fcf1 100644 --- a/accumulo-handler/src/test/results/positive/accumulo_queries.q.out +++ b/accumulo-handler/src/test/results/positive/accumulo_queries.q.out @@ -41,6 +41,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -57,6 +58,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.accumulo_table_1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -82,6 +90,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.accumulo_table_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE accumulo_table_1 SELECT * WHERE (key%2)=0 PREHOOK: type: QUERY @@ -501,9 +535,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-6 depends on stages: Stage-2, Stage-1, Stage-4 Stage-1 is a root stage - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-0 @@ -518,11 +554,18 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value, count + Column Types: int, string, int + Table: default.accumulo_table_3 + Stage: Stage-1 Pre Insert operator: Pre-Insert task - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -604,6 +647,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.accumulo_table_3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, count + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE accumulo_table_3 SELECT x.key, x.value, Y.count diff --git a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out index 6621a4e..50a7580 100644 --- a/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out +++ b/accumulo-handler/src/test/results/positive/accumulo_single_sourced_multi_insert.q.out @@ -35,6 +35,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-12 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 + Stage-13 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 Stage-1 is a root stage Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 @@ -44,6 +46,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-0 @@ -58,6 +61,20 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x2 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -83,6 +100,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +128,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat serde: org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe name: default.src_x2 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -151,6 +209,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src a insert overwrite table src_x1 select key,"" where a.key > 0 AND a.key < 50 diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 420d35e..8e0ecf0 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1659,7 +1659,7 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal // Statistics HIVESTATSAUTOGATHER("hive.stats.autogather", true, "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), - HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", false, + HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", true, "A flag to gather column statistics automatically."), HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("custom", "fs"), "The storage that stores temporary Hive statistics. In filesystem based statistics collection ('fs'), \n" + diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out index 6876ca8..b02bc17 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out index 79cf8fe..db70573 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes2.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -68,6 +69,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -91,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: smallint, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out index fec58ef..40c2dbc 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes3.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -68,6 +69,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -91,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out index 1131478..c75fa3b 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes4.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +112,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(cast(src.key as tinyint), src.value) ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe' diff --git a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out index 8d3b95e..427286a 100644 --- a/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out +++ b/contrib/src/test/results/clientpositive/serde_typedbytes5.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml index 62364fe..4985803 100644 --- a/data/conf/hive-site.xml +++ b/data/conf/hive-site.xml @@ -302,12 +302,15 @@ true - hive.llap.io.allocator.direct false + + hive.stats.column.autogather + true + hive.materializedview.rewriting diff --git a/hbase-handler/src/test/results/positive/hbase_ddl.q.out b/hbase-handler/src/test/results/positive/hbase_ddl.q.out index 8cb88ed..9be51ae 100644 --- a/hbase-handler/src/test/results/positive/hbase_ddl.q.out +++ b/hbase-handler/src/test/results/positive/hbase_ddl.q.out @@ -41,6 +41,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -57,6 +58,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.hbase_table_1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -82,6 +90,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0 PREHOOK: type: QUERY @@ -117,6 +151,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} hbase.mapred.output.outputtable kkk hbase.table.name hbase_table_0 #### A masked pattern was here #### @@ -164,6 +199,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} hbase.table.name hbase_table_0 #### A masked pattern was here #### numFiles 0 diff --git a/hbase-handler/src/test/results/positive/hbase_queries.q.out b/hbase-handler/src/test/results/positive/hbase_queries.q.out index 276b6b8..035e62e 100644 --- a/hbase-handler/src/test/results/positive/hbase_queries.q.out +++ b/hbase-handler/src/test/results/positive/hbase_queries.q.out @@ -41,6 +41,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -57,6 +58,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.hbase_table_1 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -82,6 +90,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM src INSERT OVERWRITE TABLE hbase_table_1 SELECT * WHERE (key%2)=0 PREHOOK: type: QUERY @@ -504,9 +538,11 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-6 depends on stages: Stage-2, Stage-1, Stage-4 Stage-1 is a root stage - Stage-4 is a root stage - Stage-3 depends on stages: Stage-4 + Stage-5 is a root stage + Stage-3 depends on stages: Stage-5 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-0 @@ -521,11 +557,18 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value, count + Column Types: int, string, int + Table: default.hbase_table_3 + Stage: Stage-1 Pre Insert operator: Pre-Insert task - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -607,6 +650,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.hbase_table_3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, count + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE hbase_table_3 SELECT x.key, x.value, Y.count diff --git a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out index 68a417d..a8ba3f6 100644 --- a/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out +++ b/hbase-handler/src/test/results/positive/hbase_single_sourced_multi_insert.q.out @@ -35,6 +35,8 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-12 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 + Stage-13 depends on stages: Stage-2, Stage-1, Stage-5, Stage-11 Stage-1 is a root stage Stage-4 is a root stage Stage-10 depends on stages: Stage-4 , consists of Stage-7, Stage-6, Stage-8 @@ -44,6 +46,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-0 @@ -58,6 +61,20 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_x2 + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -83,6 +100,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_x1 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 50) and (key < 100)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +128,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.src_x2 + Select Operator + expressions: _col0 (type: string), '' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -151,6 +209,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src a insert overwrite table src_x1 select key,"" where a.key > 0 AND a.key < 50 diff --git a/hbase-handler/src/test/results/positive/hbasestats.q.out b/hbase-handler/src/test/results/positive/hbasestats.q.out index bf902e4..9909342 100644 --- a/hbase-handler/src/test/results/positive/hbasestats.q.out +++ b/hbase-handler/src/test/results/positive/hbasestats.q.out @@ -64,6 +64,7 @@ POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-0 is a root stage Stage-2 + Stage-4 depends on stages: Stage-2, Stage-1, Stage-3 Stage-1 is a root stage Stage-3 is a root stage @@ -80,6 +81,13 @@ STAGE PLANS: Insert operator: Insert + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, state, country, country_id + Column Types: string, string, string, int + Table: default.users + Stage: Stage-1 Pre Insert operator: Pre-Insert task @@ -102,6 +110,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.hbase.HiveHBaseTableOutputFormat serde: org.apache.hadoop.hive.hbase.HBaseSerDe name: default.users + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: int) + outputColumnNames: key, state, country, country_id + Statistics: Num rows: 500 Data size: 133000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(state, 16), compute_stats(country, 16), compute_stats(country_id, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE users SELECT 'user1', 'IA', 'USA', 0 FROM src PREHOOK: type: QUERY @@ -131,6 +165,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -196,6 +231,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 0 @@ -252,7 +288,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 2 @@ -323,6 +359,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"country\":\"true\",\"country_id\":\"true\",\"key\":\"true\",\"state\":\"true\"}} #### A masked pattern was here #### numFiles 0 numRows 2 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out index e55b1c2..59b2397 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_dynamic_partitions.q.out @@ -78,6 +78,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -211,6 +212,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: false + PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@table1 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out index 4ed53e5..604197b 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_into_table.q.out @@ -43,6 +43,7 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-2 depends on stages: Stage-0, Stage-4 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-0 depends on stages: Stage-3, Stage-6 Stage-5 @@ -71,6 +72,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -90,6 +92,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: id + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(id, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -129,6 +147,35 @@ STAGE PLANS: name: default.values__tmp__table__3 Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -142,6 +189,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -163,6 +211,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -177,6 +233,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -205,6 +262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -224,6 +282,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -253,6 +312,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -284,6 +344,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -312,6 +373,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -331,6 +393,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out index 6e95fd1..17db9db 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_directory.q.out @@ -107,7 +107,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -129,7 +129,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -358,7 +358,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key @@ -380,7 +380,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns id,key diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out index 660cebb..3894ec4 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_dynamic_partitions.q.out @@ -96,6 +96,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -229,6 +230,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: false + PREHOOK: query: DROP TABLE table1 PREHOOK: type: DROPTABLE PREHOOK: Input: default@table1 diff --git a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out index 8052fd8..d29bd60 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/insert_overwrite_table.q.out @@ -51,6 +51,7 @@ STAGE DEPENDENCIES: Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 Stage-2 depends on stages: Stage-0, Stage-4 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-0 depends on stages: Stage-3, Stage-6 Stage-5 @@ -79,7 +80,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -101,6 +102,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: id + Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(id, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -140,6 +157,35 @@ STAGE PLANS: name: default.values__tmp__table__3 Truncated Path -> Alias: #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -153,7 +199,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -177,6 +223,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: id + Column Types: int + Table: default.table1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -191,7 +245,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -222,7 +276,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -244,7 +298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -276,7 +330,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -310,7 +364,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -341,7 +395,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id @@ -363,7 +417,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"id":"true"}} bucket_count -1 column.name.delimiter , columns id diff --git a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out index 8a90a9e..6b82c7e 100644 --- a/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out +++ b/itests/hive-blobstore/src/test/results/clientpositive/write_final_output_blobstore.q.out @@ -33,6 +33,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -209,6 +210,41 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -238,6 +274,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.blobstore_table + Is Table Level Stats: true + PREHOOK: query: EXPLAIN EXTENDED FROM hdfs_table INSERT OVERWRITE TABLE blobstore_table SELECT hdfs_table.key GROUP BY hdfs_table.key ORDER BY hdfs_table.key PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED FROM hdfs_table INSERT OVERWRITE TABLE blobstore_table SELECT hdfs_table.key GROUP BY hdfs_table.key ORDER BY hdfs_table.key @@ -247,6 +291,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -423,6 +468,41 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -452,6 +532,14 @@ STAGE PLANS: Stats-Aggr Operator Stats Aggregation Key Prefix: ### BLOBSTORE_STAGING_PATH ### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.blobstore_table + Is Table Level Stats: true + PREHOOK: query: DROP TABLE hdfs_table PREHOOK: type: DROPTABLE PREHOOK: Input: default@hdfs_table diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java index ad2baa2..e8ef4b9 100644 --- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java +++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/TestMTQueries.java @@ -44,6 +44,7 @@ public void testMTQueries1() throws Exception { util.getConf().setBoolean("hive.exec.submit.local.task.via.child", true); util.getConf().set("hive.stats.dbclass", "fs"); util.getConf().set("hive.mapred.mode", "nonstrict"); + util.getConf().set("hive.stats.column.autogather", "false"); } boolean success = QTestUtil.queryListRunnerMultiThreaded(qfiles, qts); if (!success) { diff --git a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java index fc2cb08..eadd46e 100644 --- a/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java +++ b/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java @@ -197,6 +197,7 @@ private static void restoreMiniHS2AndConnections() throws Exception { private static void startMiniHS2(HiveConf conf) throws Exception { conf.setBoolVar(ConfVars.HIVE_SUPPORT_CONCURRENCY, false); conf.setBoolVar(ConfVars.HIVE_SERVER2_LOGGING_OPERATION_ENABLED, false); + conf.setBoolVar(ConfVars.HIVESTATSCOLAUTOGATHER, false); miniHS2 = new MiniHS2.Builder().withConf(conf).cleanupLocalDirOnStartup(false).build(); Map confOverlay = new HashMap(); miniHS2.start(confOverlay); diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java index 6dae7ba..f2a8ef9 100644 --- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java +++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java @@ -110,6 +110,7 @@ public void setUp() throws Exception { .setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true); + hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false); TxnDbUtil.setConfValues(hiveConf); TxnDbUtil.prepDb(); File f = new File(TEST_WAREHOUSE_DIR); diff --git a/ql/src/test/queries/clientpositive/bucket_num_reducers.q b/ql/src/test/queries/clientpositive/bucket_num_reducers.q index 06f334e..5c5008e 100644 --- a/ql/src/test/queries/clientpositive/bucket_num_reducers.q +++ b/ql/src/test/queries/clientpositive/bucket_num_reducers.q @@ -1,4 +1,4 @@ -; +set hive.stats.column.autogather=false; set hive.exec.mode.local.auto=false; set mapred.reduce.tasks = 10; diff --git a/ql/src/test/queries/clientpositive/combine1.q b/ql/src/test/queries/clientpositive/combine1.q index 3bcb8b1..b300830 100644 --- a/ql/src/test/queries/clientpositive/combine1.q +++ b/ql/src/test/queries/clientpositive/combine1.q @@ -7,6 +7,8 @@ set mapred.max.split.size=256; set mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCodec; +set hive.stats.column.autogather=false; + -- SORT_QUERY_RESULTS create table combine1_1(key string, value string) stored as textfile; diff --git a/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q b/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q index 4dcea1f..7159ad5 100644 --- a/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q +++ b/ql/src/test/queries/clientpositive/encryption_join_with_different_encryption_keys.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; --SORT_QUERY_RESULTS -- Java JCE must be installed in order to hava a key length of 256 bits diff --git a/ql/src/test/queries/clientpositive/groupby1.q b/ql/src/test/queries/clientpositive/groupby1.q index a8c9a8d..cd3a12b 100755 --- a/ql/src/test/queries/clientpositive/groupby1.q +++ b/ql/src/test/queries/clientpositive/groupby1.q @@ -1,3 +1,5 @@ +-- due to testMTQueries1 +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.map.aggr=false; diff --git a/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q b/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q index 6824c1c..c0ddb8b 100644 --- a/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q +++ b/ql/src/test/queries/clientpositive/infer_bucket_sort_reducers_power_two.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; set hive.exec.infer.bucket.sort=true; set hive.exec.infer.bucket.sort.num.buckets.power.two=true; diff --git a/ql/src/test/queries/clientpositive/join2.q b/ql/src/test/queries/clientpositive/join2.q index 8aedd56..c3c7c24 100644 --- a/ql/src/test/queries/clientpositive/join2.q +++ b/ql/src/test/queries/clientpositive/join2.q @@ -1,3 +1,5 @@ +-- due to testMTQueries1 +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; -- SORT_QUERY_RESULTS diff --git a/ql/src/test/queries/clientpositive/orc_wide_table.q b/ql/src/test/queries/clientpositive/orc_wide_table.q index 422a3c2..d2ec385 100644 --- a/ql/src/test/queries/clientpositive/orc_wide_table.q +++ b/ql/src/test/queries/clientpositive/orc_wide_table.q @@ -1,4 +1,5 @@ set hive.mapred.mode=nonstrict; +set hive.stats.column.autogather=false; drop table if exists test_txt; drop table if exists test_orc; create table test_txt( diff --git a/ql/src/test/queries/clientpositive/partition_coltype_literals.q b/ql/src/test/queries/clientpositive/partition_coltype_literals.q index eb56b1a..8da4876 100644 --- a/ql/src/test/queries/clientpositive/partition_coltype_literals.q +++ b/ql/src/test/queries/clientpositive/partition_coltype_literals.q @@ -1,3 +1,4 @@ +set hive.stats.column.autogather=false; set hive.compute.query.using.stats=false; drop table if exists partcoltypenum; create table partcoltypenum (key int, value string) partitioned by (tint tinyint, sint smallint, bint bigint); diff --git a/ql/src/test/queries/clientpositive/smb_join_partition_key.q b/ql/src/test/queries/clientpositive/smb_join_partition_key.q index 160bf5e..b8a8323 100644 --- a/ql/src/test/queries/clientpositive/smb_join_partition_key.q +++ b/ql/src/test/queries/clientpositive/smb_join_partition_key.q @@ -1,3 +1,5 @@ +--because p1 is decimal +set hive.stats.column.autogather=false; set hive.mapred.mode=nonstrict; SET hive.enforce.sortmergebucketmapjoin=false; SET hive.auto.convert.sortmerge.join=true; diff --git a/ql/src/test/queries/clientpositive/udf_round_2.q b/ql/src/test/queries/clientpositive/udf_round_2.q index 43988c1..38885a9 100644 --- a/ql/src/test/queries/clientpositive/udf_round_2.q +++ b/ql/src/test/queries/clientpositive/udf_round_2.q @@ -1,4 +1,5 @@ set hive.fetch.task.conversion=more; +set hive.stats.column.autogather=false; -- test for NaN (not-a-number) create table tstTbl1(n double); diff --git a/ql/src/test/results/clientnegative/fileformat_void_input.q.out b/ql/src/test/results/clientnegative/fileformat_void_input.q.out index 6043258..538e8f4 100644 --- a/ql/src/test/results/clientnegative/fileformat_void_input.q.out +++ b/ql/src/test/results/clientnegative/fileformat_void_input.q.out @@ -10,16 +10,4 @@ POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@dest1 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@dest1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 10 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@dest1 -POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: dest1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -FAILED: SemanticException 3:20 Input format must implement InputFormat. Error encountered near token 'dest1' +FAILED: SemanticException 1:70 Input format must implement InputFormat. Error encountered near token 'dest1' diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out index 12dcc11..fc61bc6 100644 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out +++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out @@ -79,7 +79,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -178,7 +178,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -277,7 +277,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -376,7 +376,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -475,7 +475,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -574,7 +574,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -673,7 +673,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -772,7 +772,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 @@ -871,7 +871,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out index 944482c..5b9ba69 100644 --- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out +++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out @@ -88,7 +88,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 8 numRows 500 rawDataSize 5312 @@ -145,7 +145,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -275,7 +275,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 12 numRows 500 rawDataSize 5312 @@ -364,7 +364,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 12 numRows 500 diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out index 426a4de..6173125 100644 --- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out +++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out @@ -232,7 +232,7 @@ STAGE PLANS: dt 100 ts 3.0 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -278,7 +278,7 @@ STAGE PLANS: dt 100 ts 6.30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -469,7 +469,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -538,7 +538,7 @@ STAGE PLANS: partcol1 2 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git a/ql/src/test/results/clientpositive/alter_table_add_partition.q.out b/ql/src/test/results/clientpositive/alter_table_add_partition.q.out index ae6624f..5c7c809 100644 --- a/ql/src/test/results/clientpositive/alter_table_add_partition.q.out +++ b/ql/src/test/results/clientpositive/alter_table_add_partition.q.out @@ -199,7 +199,7 @@ Database: default Table: mp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 diff --git a/ql/src/test/results/clientpositive/alter_table_serde2.q.out b/ql/src/test/results/clientpositive/alter_table_serde2.q.out index ae0ef54..324859d 100644 --- a/ql/src/test/results/clientpositive/alter_table_serde2.q.out +++ b/ql/src/test/results/clientpositive/alter_table_serde2.q.out @@ -79,7 +79,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -179,7 +179,7 @@ Database: default Table: tst1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out index 2bfc04a..3a47edb 100644 --- a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out +++ b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out @@ -102,7 +102,7 @@ STAGE PLANS: partition values: age 15 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -147,7 +147,7 @@ STAGE PLANS: partition values: age 30 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -192,7 +192,7 @@ STAGE PLANS: partition values: age 40 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name @@ -237,7 +237,7 @@ STAGE PLANS: partition values: age __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}} bucket_count -1 column.name.delimiter , columns name diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out index e22c3ef..bcb1dd2 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain select * from loc_orc where state='OH' @@ -87,17 +87,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (state = 'OH') (type: boolean) - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out index a8e4854..150e7f8 100644 --- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,13 +129,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1) keys: _col0 (type: string), _col2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -151,7 +151,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +159,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -743,30 +743,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/auto_join1.q.out b/ql/src/test/results/clientpositive/auto_join1.q.out index 5f4bb74..dbd49a5 100644 --- a/ql/src/test/results/clientpositive/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/auto_join1.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join14.q.out b/ql/src/test/results/clientpositive/auto_join14.q.out index 1dd677c..affad1e 100644 --- a/ql/src/test/results/clientpositive/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/auto_join14.q.out @@ -15,13 +15,15 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join17.q.out b/ql/src/test/results/clientpositive/auto_join17.q.out index d39c36e..87158b7 100644 --- a/ql/src/test/results/clientpositive/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/auto_join17.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join19.q.out b/ql/src/test/results/clientpositive/auto_join19.q.out index 3f70055..d492a03 100644 --- a/ql/src/test/results/clientpositive/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/auto_join19.q.out @@ -17,13 +17,15 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out index 3f70055..d492a03 100644 --- a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out +++ b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out @@ -17,13 +17,15 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git a/ql/src/test/results/clientpositive/auto_join2.q.out b/ql/src/test/results/clientpositive/auto_join2.q.out index b17d344..0c3fa26 100644 --- a/ql/src/test/results/clientpositive/auto_join2.q.out +++ b/ql/src/test/results/clientpositive/auto_join2.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -62,7 +64,7 @@ STAGE PLANS: 0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double) 1 UDFToDouble(_col0) (type: double) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -103,6 +105,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -119,6 +136,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join25.q.out b/ql/src/test/results/clientpositive/auto_join25.q.out index 534bdb6..b8c7123 100644 --- a/ql/src/test/results/clientpositive/auto_join25.q.out +++ b/ql/src/test/results/clientpositive/auto_join25.q.out @@ -18,8 +18,6 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') @@ -33,11 +31,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 POSTHOOK: Output: default@dest1 POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-1:MAPRED +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-5:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-10:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -64,8 +64,6 @@ PREHOOK: Input: default@src PREHOOK: Output: default@dest_j2 FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value POSTHOOK: type: QUERY @@ -73,14 +71,16 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j2 POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +RUN: Stage-12:CONDITIONAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-8:CONDITIONAL -RUN: Stage-12:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-9:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-7:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS +RUN: Stage-17:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j2 @@ -105,8 +105,6 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest_j1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY @@ -114,11 +112,13 @@ POSTHOOK: Input: default@src POSTHOOK: Output: default@dest_j1 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-1:MAPRED +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-5:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-10:COLUMNSTATS PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 diff --git a/ql/src/test/results/clientpositive/auto_join26.q.out b/ql/src/test/results/clientpositive/auto_join26.q.out index b05145d..50340cd 100644 --- a/ql/src/test/results/clientpositive/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/auto_join26.q.out @@ -15,13 +15,15 @@ INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:x @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join3.q.out b/ql/src/test/results/clientpositive/auto_join3.q.out index 35e8273..d230ae9 100644 --- a/ql/src/test/results/clientpositive/auto_join3.q.out +++ b/ql/src/test/results/clientpositive/auto_join3.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -64,7 +66,7 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -99,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join4.q.out b/ql/src/test/results/clientpositive/auto_join4.q.out index eacf056..a24687d 100644 --- a/ql/src/test/results/clientpositive/auto_join4.q.out +++ b/ql/src/test/results/clientpositive/auto_join4.q.out @@ -37,13 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +68,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join5.q.out b/ql/src/test/results/clientpositive/auto_join5.q.out index d526595..8df09ca 100644 --- a/ql/src/test/results/clientpositive/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/auto_join5.q.out @@ -37,13 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -66,7 +68,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -99,6 +101,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join6.q.out b/ql/src/test/results/clientpositive/auto_join6.q.out index 53caf7d..d661908 100644 --- a/ql/src/test/results/clientpositive/auto_join6.q.out +++ b/ql/src/test/results/clientpositive/auto_join6.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join7.q.out b/ql/src/test/results/clientpositive/auto_join7.q.out index aec4e82..8a23e3d 100644 --- a/ql/src/test/results/clientpositive/auto_join7.q.out +++ b/ql/src/test/results/clientpositive/auto_join7.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +128,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -140,6 +157,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join8.q.out b/ql/src/test/results/clientpositive/auto_join8.q.out index ccbafba..8f3a02b 100644 --- a/ql/src/test/results/clientpositive/auto_join8.q.out +++ b/ql/src/test/results/clientpositive/auto_join8.q.out @@ -37,13 +37,15 @@ FROM ( INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src2 @@ -66,7 +68,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -102,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -118,6 +135,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/auto_join9.q.out b/ql/src/test/results/clientpositive/auto_join9.q.out index d7d7d18..6474ef0 100644 --- a/ql/src/test/results/clientpositive/auto_join9.q.out +++ b/ql/src/test/results/clientpositive/auto_join9.q.out @@ -15,13 +15,15 @@ FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out index 156be41..a7508c2 100644 --- a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out @@ -141,7 +141,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -162,7 +162,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -182,7 +182,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_0:orderpayment, $hdt$_1:dim_pay_date] + /orderpayment_small [$hdt$_1:orderpayment, $hdt$_2:dim_pay_date] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -277,7 +277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -298,7 +298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -318,7 +318,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_2:deal] + /orderpayment_small [$hdt$_3:deal] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -414,7 +414,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -435,7 +435,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -455,7 +455,7 @@ STAGE PLANS: name: default.orderpayment_small name: default.orderpayment_small Truncated Path -> Alias: - /orderpayment_small [$hdt$_3:order_city] + /orderpayment_small [$hdt$_4:order_city] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: @@ -551,7 +551,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -572,7 +572,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -592,7 +592,7 @@ STAGE PLANS: name: default.user_small name: default.user_small Truncated Path -> Alias: - /user_small [$hdt$_4:user] + /user_small [$hdt$_0:user] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out index 8c7658c..eda6619 100644 --- a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out @@ -70,8 +70,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -104,6 +107,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -114,6 +127,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -128,6 +165,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -141,6 +192,25 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b @@ -247,8 +317,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -281,6 +354,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -291,6 +374,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -305,6 +412,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -318,6 +439,25 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b @@ -424,8 +564,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -458,6 +601,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -468,6 +621,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -482,6 +659,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -495,6 +686,25 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( SELECT a.key key1, a.value value1, b.key key2, b.value value2 FROM tbl1 a JOIN tbl2 b diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out index b53e670..9498f75 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_11.q.out @@ -14,24 +14,6 @@ POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table2 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1@ds=1 -PREHOOK: Output: default@test_table2@ds=1 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1@ds=1 -POSTHOOK: Output: default@test_table2@ds=1 -POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) INTO 16 BUCKETS PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -40,16 +22,12 @@ POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table3 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' -POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -93,7 +71,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -172,6 +150,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -205,22 +209,86 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1 -PREHOOK: Input: default@test_table1@ds=1 -PREHOOK: Input: default@test_table2 -PREHOOK: Input: default@test_table2@ds=1 -PREHOOK: Output: default@test_table3@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1 -POSTHOOK: Input: default@test_table1@ds=1 -POSTHOOK: Input: default@test_table2 -POSTHOOK: Input: default@test_table2@ds=1 -POSTHOOK: Output: default@test_table3@ds=1 -POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: smb_mapjoin_11.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: SELECT * FROM test_table1 ORDER BY key PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -1827,7 +1895,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1936,7 +2004,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out index 9928a60..bdf550f 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_12.q.out @@ -14,36 +14,6 @@ POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) PARTITIONED BY POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table2 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') SELECT * -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1@ds=1 -PREHOOK: Output: default@test_table2@ds=1 -PREHOOK: Output: default@test_table2@ds=2 -PREHOOK: Output: default@test_table2@ds=3 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT * -INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '3') SELECT * -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1@ds=1 -POSTHOOK: Output: default@test_table2@ds=1 -POSTHOOK: Output: default@test_table2@ds=2 -POSTHOOK: Output: default@test_table2@ds=3 -POSTHOOK: Lineage: test_table1 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=2).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=2).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2 PARTITION(ds=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY (ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 16 BUCKETS PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -52,16 +22,11 @@ POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) PARTITIONED BY POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table3 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' -PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' -POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -129,7 +94,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -207,26 +172,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1 -PREHOOK: Input: default@test_table1@ds=1 -PREHOOK: Input: default@test_table2 -PREHOOK: Input: default@test_table2@ds=1 -PREHOOK: Input: default@test_table2@ds=2 -PREHOOK: Input: default@test_table2@ds=3 -PREHOOK: Output: default@test_table3@ds=1 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1 -POSTHOOK: Input: default@test_table1@ds=1 -POSTHOOK: Input: default@test_table2 -POSTHOOK: Input: default@test_table2@ds=1 -POSTHOOK: Input: default@test_table2@ds=2 -POSTHOOK: Input: default@test_table2@ds=3 -POSTHOOK: Output: default@test_table3@ds=1 -POSTHOOK: Lineage: test_table3 PARTITION(ds=1).key SIMPLE [(test_table1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3 PARTITION(ds=1).value SIMPLE [(test_table2)b.FieldSchema(name:value, type:string, comment:null), ] + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: smb_mapjoin_12.test_table3 + Is Table Level Stats: false + PREHOOK: query: SELECT COUNT(*) FROM test_table3 TABLESAMPLE(BUCKET 2 OUT OF 16) a JOIN test_table1 TABLESAMPLE(BUCKET 2 OUT OF 16) b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -242,18 +195,11 @@ POSTHOOK: Input: default@test_table3 POSTHOOK: Input: default@test_table3@ds=1 #### A masked pattern was here #### 879 -PREHOOK: query: explain extended -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') -SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') -SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' -POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -399,24 +345,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') -SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' -PREHOOK: type: QUERY -PREHOOK: Input: default@test_table1 -PREHOOK: Input: default@test_table1@ds=1 -PREHOOK: Input: default@test_table3 -PREHOOK: Input: default@test_table3@ds=1 -PREHOOK: Output: default@test_table3@ds=2 -POSTHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') -SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@test_table1 -POSTHOOK: Input: default@test_table1@ds=1 -POSTHOOK: Input: default@test_table3 -POSTHOOK: Input: default@test_table3@ds=1 -POSTHOOK: Output: default@test_table3@ds=2 -POSTHOOK: Lineage: test_table3 PARTITION(ds=2).key SIMPLE [(test_table3)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: test_table3 PARTITION(ds=2).value EXPRESSION [(test_table3)a.FieldSchema(name:value, type:string, comment:null), (test_table1)b.FieldSchema(name:value, type:string, comment:null), ] + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: smb_mapjoin_12.test_table3 + Is Table Level Stats: false + PREHOOK: query: SELECT count(*) from test_table3 tablesample (bucket 2 out of 16) a where ds = '2' PREHOOK: type: QUERY PREHOOK: Input: default@test_table3 diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out index 49ff635..6cbb473 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_13.q.out @@ -30,36 +30,6 @@ POSTHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table4 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INSERT OVERWRITE TABLE test_table3 SELECT * -INSERT OVERWRITE TABLE test_table4 SELECT * -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1 -PREHOOK: Output: default@test_table2 -PREHOOK: Output: default@test_table3 -PREHOOK: Output: default@test_table4 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -INSERT OVERWRITE TABLE test_table3 SELECT * -INSERT OVERWRITE TABLE test_table4 SELECT * -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1 -POSTHOOK: Output: default@test_table2 -POSTHOOK: Output: default@test_table3 -POSTHOOK: Output: default@test_table4 -POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2.value EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: EXPLAIN EXTENDED SELECT /*+ MAPJOIN(b) */ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 PREHOOK: type: QUERY @@ -112,7 +82,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,7 +105,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -292,7 +262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -315,7 +285,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out index ab2b323..d6ca14e 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_16.q.out @@ -14,24 +14,6 @@ POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY ( POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@test_table2 -PREHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@test_table1 -PREHOOK: Output: default@test_table2 -POSTHOOK: query: FROM src -INSERT OVERWRITE TABLE test_table1 SELECT * -INSERT OVERWRITE TABLE test_table2 SELECT * -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@test_table1 -POSTHOOK: Output: default@test_table2 -POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] PREHOOK: query: EXPLAIN SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out index 82f5804..000ea34 100644 --- a/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out @@ -54,50 +54,6 @@ POSTHOOK: query: load data local inpath '../../data/files/empty2.txt' into table POSTHOOK: type: LOAD #### A masked pattern was here #### POSTHOOK: Output: default@smb_bucket4_1 -PREHOOK: query: insert overwrite table smb_bucket4_2 -select * from src -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@smb_bucket4_2 -POSTHOOK: query: insert overwrite table smb_bucket4_2 -select * from src -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@smb_bucket4_2 -POSTHOOK: Lineage: smb_bucket4_2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: smb_bucket4_2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: insert overwrite table smb_join_results_empty_bigtable -select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@smb_bucket4_1 -PREHOOK: Input: default@smb_bucket4_2 -PREHOOK: Output: default@smb_join_results_empty_bigtable -POSTHOOK: query: insert overwrite table smb_join_results_empty_bigtable -select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smb_bucket4_1 -POSTHOOK: Input: default@smb_bucket4_2 -POSTHOOK: Output: default@smb_join_results_empty_bigtable -POSTHOOK: Lineage: smb_join_results_empty_bigtable.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: insert overwrite table smb_join_results_empty_bigtable -select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@smb_bucket4_1 -PREHOOK: Input: default@smb_bucket4_2 -PREHOOK: Output: default@smb_join_results_empty_bigtable -POSTHOOK: query: insert overwrite table smb_join_results_empty_bigtable -select /*+mapjoin(b)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smb_bucket4_1 -POSTHOOK: Input: default@smb_bucket4_2 -POSTHOOK: Output: default@smb_join_results_empty_bigtable -POSTHOOK: Lineage: smb_join_results_empty_bigtable.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: smb_join_results_empty_bigtable.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: select * from smb_join_results_empty_bigtable order by k1, v1, k2, v2 PREHOOK: type: QUERY PREHOOK: Input: default@smb_join_results_empty_bigtable @@ -606,20 +562,13 @@ NULL NULL 497 val_497 NULL NULL 498 val_498 NULL NULL 498 val_498 NULL NULL 498 val_498 -PREHOOK: query: explain -insert overwrite table smb_join_results -select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -POSTHOOK: query: explain -insert overwrite table smb_join_results -select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage Stage-8 depends on stages: Stage-1 , consists of Stage-5, Stage-4, Stage-6 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -648,6 +597,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -671,6 +641,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: smb_mapjoin_7.smb_join_results + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -701,22 +678,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### -PREHOOK: query: insert overwrite table smb_join_results -select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@smb_bucket4_1 -PREHOOK: Input: default@smb_bucket4_2 -PREHOOK: Output: default@smb_join_results -POSTHOOK: query: insert overwrite table smb_join_results -select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smb_bucket4_1 -POSTHOOK: Input: default@smb_bucket4_2 -POSTHOOK: Output: default@smb_join_results -POSTHOOK: Lineage: smb_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: smb_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: select * from smb_join_results order by k1, v1, k2, v2 PREHOOK: type: QUERY PREHOOK: Input: default@smb_join_results @@ -1225,20 +1186,6 @@ NULL NULL 497 val_497 NULL NULL 498 val_498 NULL NULL 498 val_498 NULL NULL 498 val_498 -PREHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@smb_bucket4_1 -PREHOOK: Input: default@smb_bucket4_2 -PREHOOK: Output: default@normal_join_results -POSTHOOK: query: insert overwrite table normal_join_results select * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@smb_bucket4_1 -POSTHOOK: Input: default@smb_bucket4_2 -POSTHOOK: Output: default@normal_join_results -POSTHOOK: Lineage: normal_join_results.k1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: normal_join_results.k2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: normal_join_results.v1 SIMPLE [(smb_bucket4_1)a.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: normal_join_results.v2 SIMPLE [(smb_bucket4_2)b.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: select sum(hash(k1)) as k1, sum(hash(k2)) as k2, sum(hash(v1)) as v1, sum(hash(v2)) as v2 from normal_join_results PREHOOK: type: QUERY PREHOOK: Input: default@normal_join_results diff --git a/ql/src/test/results/clientpositive/binary_output_format.q.out b/ql/src/test/results/clientpositive/binary_output_format.q.out index 96afeb6..c589541 100644 --- a/ql/src/test/results/clientpositive/binary_output_format.q.out +++ b/ql/src/test/results/clientpositive/binary_output_format.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -117,6 +118,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string) + outputColumnNames: mydata + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(mydata, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -168,6 +185,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0 + columns.types struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -211,6 +257,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: mydata + Column Types: string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/bucket1.q.out b/ql/src/test/results/clientpositive/bucket1.q.out index 78571aa..68f0c2b 100644 --- a/ql/src/test/results/clientpositive/bucket1.q.out +++ b/ql/src/test/results/clientpositive/bucket1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -129,6 +131,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -163,6 +192,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket1_1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucket1_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/bucket2.q.out b/ql/src/test/results/clientpositive/bucket2.q.out index a8e9f90..fbc7ef7 100644 --- a/ql/src/test/results/clientpositive/bucket2.q.out +++ b/ql/src/test/results/clientpositive/bucket2.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -129,6 +130,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -163,6 +199,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/bucket3.q.out b/ql/src/test/results/clientpositive/bucket3.q.out index b1173e7..10ae348 100644 --- a/ql/src/test/results/clientpositive/bucket3.q.out +++ b/ql/src/test/results/clientpositive/bucket3.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -127,6 +129,34 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -160,6 +190,90 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out index 5478f12..e111b02 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out @@ -117,13 +117,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -197,7 +199,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -257,6 +259,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -393,6 +422,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -451,13 +557,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -531,7 +639,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -570,7 +678,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -591,6 +699,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -704,7 +839,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -727,6 +862,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out index e335683..070be5f 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out @@ -101,13 +101,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -181,7 +183,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +243,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -377,6 +406,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -435,13 +541,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:b @@ -515,7 +623,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 0 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -554,7 +662,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -575,6 +683,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -688,7 +823,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -711,6 +846,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out index bd5a438..a1be9dd 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out @@ -101,13 +101,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -181,7 +183,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -241,6 +243,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -377,6 +406,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -435,13 +541,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b on a.key=b.key and b.ds="2008-04-08" POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:a @@ -515,7 +623,7 @@ STAGE PLANS: 1 _col0 (type: int) Position of Big Table: 1 - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -554,7 +662,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -575,6 +683,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -688,7 +823,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -711,6 +846,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out index 5743944..688fdfa 100644 --- a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out +++ b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out @@ -189,7 +189,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -212,7 +212,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -239,7 +239,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -262,7 +262,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -289,7 +289,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -312,7 +312,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -545,7 +545,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -568,7 +568,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -618,7 +618,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -645,7 +645,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -668,7 +668,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out index 71b2924..406cca4 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -192,7 +192,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name value column.name.delimiter , @@ -241,7 +241,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -371,7 +371,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -480,7 +480,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -615,7 +615,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -724,7 +724,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -859,7 +859,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -968,7 +968,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out index febc2b6..faae2ea 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out @@ -181,6 +181,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -272,6 +273,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -376,6 +393,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -418,6 +464,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -722,6 +776,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -792,7 +847,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -813,6 +868,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -917,6 +988,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -936,7 +1036,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -959,6 +1059,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -973,7 +1081,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1003,7 +1111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1024,7 +1132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1060,7 +1168,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1090,7 +1198,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1111,7 +1219,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out index ee075c1..05a76e2 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out @@ -82,6 +82,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -212,6 +213,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -265,6 +282,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -307,6 +353,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out index 2728bb7..edca088 100644 --- a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out +++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out @@ -91,6 +91,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -275,6 +276,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -328,6 +345,35 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -370,6 +416,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out index 165f0dc..132ec23 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -84,6 +85,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from ( @@ -153,6 +161,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -189,6 +198,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT * from ( @@ -258,6 +274,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -294,6 +311,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key+x.key, x.value from @@ -312,6 +336,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -343,6 +369,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -359,6 +401,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.k1, concat(x.v1, x.v1) from @@ -377,6 +455,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -413,3 +492,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out index c5e03be..7a97b1f 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out @@ -44,6 +44,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -80,6 +81,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.value, x.key from (SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x @@ -151,6 +159,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -182,6 +192,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), '1' (type: string) + outputColumnNames: value, key, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(key, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -198,6 +224,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT x.key, x.value from (SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out index 1d794c3..252d6ab 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out @@ -65,39 +65,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -106,7 +81,7 @@ STAGE PLANS: Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -121,8 +96,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -134,6 +107,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, key2, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -150,98 +137,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 key (type: int) - 1 key (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 key (type: int) - 1 key (type: int) - outputColumnNames: _col0, _col1, _col7 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col7) (type: string) - outputColumnNames: _col1, _col2 - Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) - value expressions: _col2 (type: string) + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key, concat(a.value, b.value) @@ -339,43 +265,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -388,7 +285,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -400,8 +297,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) @@ -413,6 +308,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -429,104 +338,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.value diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out index 1e70105..fb90070 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out @@ -65,43 +65,14 @@ FROM test_table1 a JOIN test_table2 b ON a.key = b.key WHERE a.ds = '1' and b.ds = '1' POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:b - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -114,7 +85,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -129,8 +100,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -142,6 +111,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -158,110 +141,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:a - TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col4 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col4) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) @@ -346,43 +256,14 @@ JOIN ON a.key = b.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1 - Stage-7 has a backup stage: Stage-1 - Stage-4 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-1, Stage-4, Stage-5 + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 - Stage-8 has a backup stage: Stage-1 - Stage-5 depends on stages: Stage-8 - Stage-1 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-6 - Conditional Operator - - Stage: Stage-7 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_1:test_table2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_1:test_table2 - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - - Stage: Stage-4 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan @@ -395,7 +276,7 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Sorted Merge Bucket Map Join Operator condition map: Inner Join 0 to 1 keys: @@ -410,8 +291,6 @@ STAGE PLANS: sort order: - Map-reduce partition columns: _col0 (type: int) value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) @@ -423,6 +302,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -439,110 +332,37 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator - Stage: Stage-8 - Map Reduce Local Work - Alias -> Map Local Tables: - $hdt$_0:test_table1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - $hdt$_0:test_table1 - TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - Stage: Stage-5 - Map Reduce - Map Operator Tree: - TableScan - alias: test_table2 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) - Local Work: - Map Reduce Local Work - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 - Stage: Stage-1 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan - alias: test_table1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: _col0 (type: int), concat(_col1, _col3) (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: - - Map-reduce partition columns: _col0 (type: int) - value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.test_table3 + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out index f3d3006..29f8896 100644 --- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out +++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out @@ -68,6 +68,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -116,6 +117,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, b.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -197,6 +205,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -245,6 +254,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT b.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b diff --git a/ql/src/test/results/clientpositive/case_sensitivity.q.out b/ql/src/test/results/clientpositive/case_sensitivity.q.out index b3969cc..0158826 100644 --- a/ql/src/test/results/clientpositive/case_sensitivity.q.out +++ b/ql/src/test/results/clientpositive/case_sensitivity.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 837 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/cast1.q.out b/ql/src/test/results/clientpositive/cast1.q.out index 9feb14f..9967c5b 100644 --- a/ql/src/test/results/clientpositive/cast1.q.out +++ b/ql/src/test/results/clientpositive/cast1.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -44,6 +45,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: string), _col6 (type: int) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3348 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3348 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3372 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3372 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7 + Column Types: int, double, double, double, int, string, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out index f260f03..d94cf30 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out @@ -66,11 +66,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int) outputColumnNames: state, locid, zip, year - Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc compute statistics for columns state @@ -106,22 +106,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sq1:loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: state, locid - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: state (type: string), locid (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) - Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator @@ -129,13 +129,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: int) mode: mergepartial outputColumnNames: state, locid, $f2 - Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(locid) keys: state (type: string), $f2 (type: bigint) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -151,7 +151,7 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reduce Operator Tree: Group By Operator @@ -159,10 +159,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, $f2, $f2_0 - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -767,30 +767,30 @@ STAGE PLANS: Map Operator Tree: TableScan alias: loc_orc - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: state (type: string), zip (type: bigint) outputColumnNames: state, zip - Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: state (type: string), zip (type: bigint) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: bigint) mode: mergepartial outputColumnNames: state, zip - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out index b296280..f8fd963 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out @@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: src1 @@ -44,7 +46,7 @@ STAGE PLANS: 0 key (type: string) 1 key (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -93,6 +110,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out index d4d70bc..05ca4bd 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -65,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -79,6 +96,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) @@ -132,6 +182,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -175,6 +227,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -189,6 +256,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) diff --git a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out index c09764c..5ce2699 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5720 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -88,6 +109,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out index af21343..48c6d1e 100644 --- a/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out +++ b/ql/src/test/results/clientpositive/columnStatsUpdateForStatsOptimizer_2.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 3 rawDataSize 24 @@ -79,7 +79,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"year\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 3 rawDataSize 24 @@ -168,7 +168,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\"}} #### A masked pattern was here #### numFiles 2 numRows 3 @@ -191,46 +191,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(month) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: month (type: int) - outputColumnNames: month - Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(month) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -322,29 +288,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: calendar - Statistics: Num rows: 136 Data size: 547 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: month (type: int) outputColumnNames: month - Statistics: Num rows: 136 Data size: 547 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 136 Data size: 544 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(month) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/constprog_dp.q.out b/ql/src/test/results/clientpositive/constprog_dp.q.out index 8cf301d..b0e4d95 100644 --- a/ql/src/test/results/clientpositive/constprog_dp.q.out +++ b/ql/src/test/results/clientpositive/constprog_dp.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -68,6 +103,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out index 00bdb4c..b783d8a 100644 --- a/ql/src/test/results/clientpositive/correlationoptimizer5.q.out +++ b/ql/src/test/results/clientpositive/correlationoptimizer5.q.out @@ -106,10 +106,12 @@ ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 + Stage-2 depends on stages: Stage-1, Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 - Stage-5 is a root stage + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 + Stage-6 is a root stage STAGE PLANS: Stage: Stage-1 @@ -198,6 +200,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -212,7 +229,36 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-5 + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -314,8 +360,12 @@ ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-0 + Stage-2 depends on stages: Stage-1, Stage-6 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 + Stage-6 is a root stage STAGE PLANS: Stage: Stage-1 @@ -351,6 +401,120 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1447 Data size: 5791 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1598 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_co2 + + Stage: Stage-3 + Stats-Aggr Operator + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: TableScan alias: m Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: NONE @@ -383,85 +547,24 @@ STAGE PLANS: Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Reduce Operator Tree: - Demux Operator - Statistics: Num rows: 2956 Data size: 12099 Basic stats: COMPLETE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Mux Operator - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - - Stage: Stage-0 - Move Operator - tables: - replace: true - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest_co2 - - Stage: Stage-2 - Stats-Aggr Operator + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 59 Data size: 237 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe PREHOOK: query: INSERT OVERWRITE TABLE dest_co2 SELECT b.key, d.val @@ -510,21 +613,23 @@ JOIN ON b.key = d.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-14 is a root stage - Stage-10 depends on stages: Stage-14 - Stage-9 depends on stages: Stage-10, Stage-11 , consists of Stage-12, Stage-13, Stage-2 - Stage-12 has a backup stage: Stage-2 - Stage-7 depends on stages: Stage-12 - Stage-0 depends on stages: Stage-2, Stage-7, Stage-8 - Stage-3 depends on stages: Stage-0 + Stage-15 is a root stage + Stage-11 depends on stages: Stage-15 + Stage-10 depends on stages: Stage-11, Stage-12 , consists of Stage-13, Stage-14, Stage-2 Stage-13 has a backup stage: Stage-2 Stage-8 depends on stages: Stage-13 + Stage-0 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-3 depends on stages: Stage-0 + Stage-17 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2, Stage-8, Stage-9 + Stage-14 has a backup stage: Stage-2 + Stage-9 depends on stages: Stage-14 Stage-2 - Stage-15 is a root stage - Stage-11 depends on stages: Stage-15 + Stage-16 is a root stage + Stage-12 depends on stages: Stage-16 STAGE PLANS: - Stage: Stage-14 + Stage: Stage-15 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_1:y @@ -547,7 +652,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -577,10 +682,10 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-9 + Stage: Stage-10 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -594,7 +699,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-7 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -618,6 +723,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -634,7 +754,36 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-13 + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, string + Table: default.dest_co3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -648,7 +797,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-8 + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -672,6 +821,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -712,8 +876,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_co3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 1757 Data size: 7032 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:$hdt$_1:m @@ -736,7 +915,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/cp_sel.q.out b/ql/src/test/results/clientpositive/cp_sel.q.out index 1778ccd..5259051 100644 --- a/ql/src/test/results/clientpositive/cp_sel.q.out +++ b/ql/src/test/results/clientpositive/cp_sel.q.out @@ -78,6 +78,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -109,6 +111,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testpartbucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -126,6 +144,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.testpartbucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table testpartbucket partition(ds,hr) select key,value,'hello' as ds, 'world' as hr from srcpart where hr=11 PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/deleteAnalyze.q.out index 4382522..1f88831 100644 --- a/ql/src/test/results/clientpositive/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/deleteAnalyze.q.out @@ -48,6 +48,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} numFiles 1 totalSize 578 #### A masked pattern was here #### @@ -71,7 +72,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -amount decimal(10,3) from deserializer +amount decimal(10,3) 12.123 123.123 0 2 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 diff --git a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out index d199574..b4efaf4 100644 --- a/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out +++ b/ql/src/test/results/clientpositive/dynamic_partition_skip_default.q.out @@ -61,7 +61,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -130,7 +130,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -199,7 +199,7 @@ STAGE PLANS: partcol1 1 partcol2 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol @@ -245,7 +245,7 @@ STAGE PLANS: partcol1 1 partcol2 __HIVE_DEFAULT_PARTITION__ properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}} bucket_count -1 column.name.delimiter , columns intcol diff --git a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out index 2abb819..0b210d0 100644 --- a/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/dynamic_rdd_cache.q.out @@ -220,9 +220,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -282,6 +286,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -296,11 +315,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -325,6 +380,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -336,9 +406,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -402,6 +494,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -461,6 +554,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -476,6 +582,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -491,6 +610,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -514,6 +659,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out index 76d0b7b..fa1b3d3 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -71,3 +72,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.non_acid + diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out index 5d894ab..18b2eb0 100644 --- a/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out +++ b/ql/src/test/results/clientpositive/encrypted/encryption_join_unencrypted_tbl.q.out @@ -595,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -616,7 +616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/explain_ddl.q.out b/ql/src/test/results/clientpositive/explain_ddl.q.out index e108e22..079b1f2 100644 --- a/ql/src/test/results/clientpositive/explain_ddl.q.out +++ b/ql/src/test/results/clientpositive/explain_ddl.q.out @@ -496,6 +496,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -519,6 +520,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.m1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -542,6 +569,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.m1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out index b212da9..323b1d5 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_full.q.out @@ -102,7 +102,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -147,7 +147,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -215,7 +215,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -260,7 +260,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip diff --git a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out index b5f4fee..1e4a999 100644 --- a/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out +++ b/ql/src/test/results/clientpositive/extrapolate_part_stats_partial.q.out @@ -119,7 +119,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -164,7 +164,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -209,7 +209,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -254,7 +254,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -296,12 +296,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1780 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_1d @@ -322,7 +322,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -367,7 +367,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -412,7 +412,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -457,7 +457,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -499,12 +499,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: analyze table loc_orc_1d partition(year='2000') compute statistics for columns state @@ -545,7 +545,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -590,7 +590,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -635,7 +635,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -680,7 +680,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -748,7 +748,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -793,7 +793,7 @@ STAGE PLANS: partition values: year 2001 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -838,7 +838,7 @@ STAGE PLANS: partition values: year 2002 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -883,7 +883,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"state":"true"}} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,zip @@ -925,12 +925,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_1d - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: create table if not exists loc_orc_2d ( @@ -1026,7 +1026,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1072,7 +1072,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1118,7 +1118,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1164,7 +1164,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1256,7 +1256,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1302,7 +1302,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1348,7 +1348,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1394,7 +1394,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1486,7 +1486,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1528,12 +1528,12 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE ListSink PREHOOK: query: explain extended select state,locid from loc_orc_2d @@ -1555,7 +1555,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1601,7 +1601,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1647,7 +1647,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1693,7 +1693,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1785,7 +1785,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1831,7 +1831,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1877,7 +1877,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -1923,7 +1923,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2015,7 +2015,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid @@ -2057,11 +2057,11 @@ STAGE PLANS: Processor Tree: TableScan alias: loc_orc_2d - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: state (type: string), locid (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 1840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE ListSink diff --git a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out index 8f9b636..884bfcd 100644 --- a/ql/src/test/results/clientpositive/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/filter_join_breaktask.q.out @@ -89,7 +89,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -129,7 +129,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:f, $hdt$_1:m] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_1:f, $hdt$_2:m] Needs Tagging: true Reduce Operator Tree: Join Operator @@ -226,7 +226,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: name: default.filter_join_breaktask name: default.filter_join_breaktask Truncated Path -> Alias: - /filter_join_breaktask/ds=2008-04-08 [$hdt$_2:g] + /filter_join_breaktask/ds=2008-04-08 [$hdt$_0:g] #### A masked pattern was here #### Needs Tagging: true Reduce Operator Tree: diff --git a/ql/src/test/results/clientpositive/groupby10.q.out b/ql/src/test/results/clientpositive/groupby10.q.out index 66832b0..2a00eb8 100644 --- a/ql/src/test/results/clientpositive/groupby10.q.out +++ b/ql/src/test/results/clientpositive/groupby10.q.out @@ -45,10 +45,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -119,6 +125,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -133,11 +149,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -156,7 +230,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -185,6 +259,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -196,9 +280,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -290,10 +418,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -364,6 +498,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -378,11 +522,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: int), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: int) @@ -401,7 +603,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -430,6 +632,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -441,9 +653,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key @@ -534,8 +790,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -574,6 +834,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: int) @@ -592,6 +862,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -606,6 +886,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -616,9 +932,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM INPUT INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key diff --git a/ql/src/test/results/clientpositive/groupby11.q.out b/ql/src/test/results/clientpositive/groupby11.q.out index 1d0e86a..da6e0cc 100644 --- a/ql/src/test/results/clientpositive/groupby11.q.out +++ b/ql/src/test/results/clientpositive/groupby11.q.out @@ -33,10 +33,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -107,6 +113,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -123,11 +139,78 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: substr(value, 5) (type: string), key (type: string) sort order: ++ Map-reduce partition columns: substr(value, 5) (type: string) @@ -146,7 +229,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -175,6 +258,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -188,9 +281,62 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 partition(ds='111') SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value diff --git a/ql/src/test/results/clientpositive/groupby12.q.out b/ql/src/test/results/clientpositive/groupby12.q.out index 921fc92..0077fac 100644 --- a/ql/src/test/results/clientpositive/groupby12.q.out +++ b/ql/src/test/results/clientpositive/groupby12.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +56,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +80,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT COUNT(src.key), COUNT(DISTINCT value) GROUP BY src.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby1_limit.q.out b/ql/src/test/results/clientpositive/groupby1_limit.q.out index 78a49eb..56a8c02 100644 --- a/ql/src/test/results/clientpositive/groupby1_limit.q.out +++ b/ql/src/test/results/clientpositive/groupby1_limit.q.out @@ -17,6 +17,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -40,7 +42,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double) Reduce Operator Tree: Group By Operator @@ -66,7 +67,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Select Operator @@ -88,6 +88,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,6 +117,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key ORDER BY src.key LIMIT 5 PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_map.q.out b/ql/src/test/results/clientpositive/groupby1_map.q.out index cc985a5..a69a482 100644 --- a/ql/src/test/results/clientpositive/groupby1_map.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map.q.out @@ -16,6 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +90,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out index cc985a5..a69a482 100644 --- a/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map_nomap.q.out @@ -16,6 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +61,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +90,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_map_skew.q.out b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out index 116744a..bdbd7fb 100644 --- a/ql/src/test/results/clientpositive/groupby1_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby1_map_skew.q.out @@ -17,6 +17,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -84,6 +86,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,6 +115,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby1_noskew.q.out b/ql/src/test/results/clientpositive/groupby1_noskew.q.out index 98c0d3c..4dbd9fe 100644 --- a/ql/src/test/results/clientpositive/groupby1_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby1_noskew.q.out @@ -16,6 +16,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -53,6 +55,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -67,6 +79,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest_g1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby2_map.q.out b/ql/src/test/results/clientpositive/groupby2_map.q.out index 0dcd810..b03af2d 100644 --- a/ql/src/test/results/clientpositive/groupby2_map.q.out +++ b/ql/src/test/results/clientpositive/groupby2_map.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out index 64477db..3508f6e 100644 --- a/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY @@ -120,6 +166,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -163,6 +211,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -177,6 +240,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out index 813ae5c..36b70bb 100644 --- a/ql/src/test/results/clientpositive/groupby2_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby2_map_skew.q.out @@ -19,6 +19,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -86,6 +88,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +117,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/groupby2_noskew.q.out index 5192db3..3065e94 100644 --- a/ql/src/test/results/clientpositive/groupby2_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby2_noskew.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +56,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +80,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out index 1c24213..8a52ddc 100644 --- a/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +57,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +81,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby3.q.out b/ql/src/test/results/clientpositive/groupby3.q.out index 2ebeae4..648afc9 100644 --- a/ql/src/test/results/clientpositive/groupby3.q.out +++ b/ql/src/test/results/clientpositive/groupby3.q.out @@ -37,6 +37,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -73,6 +75,7 @@ STAGE PLANS: TableScan Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reduce Operator Tree: @@ -93,6 +96,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -107,6 +125,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_map.q.out b/ql/src/test/results/clientpositive/groupby3_map.q.out index 07c122e..9fe74de 100644 --- a/ql/src/test/results/clientpositive/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +112,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out index a4501f7..02d7bfd 100644 --- a/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,6 +82,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out index e02bdeb..621339d 100644 --- a/ql/src/test/results/clientpositive/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby3_map_skew.q.out @@ -37,6 +37,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -100,6 +101,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -114,6 +135,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/groupby3_noskew.q.out index 624fd2d..8771ecd 100644 --- a/ql/src/test/results/clientpositive/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -70,6 +71,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out index a1d403d..aa282aa 100644 --- a/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby3_noskew_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -88,6 +105,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/groupby4.q.out b/ql/src/test/results/clientpositive/groupby4.q.out index 3f77e47..8ca3d8e 100644 --- a/ql/src/test/results/clientpositive/groupby4.q.out +++ b/ql/src/test/results/clientpositive/groupby4.q.out @@ -19,6 +19,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +75,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -86,6 +99,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby4_map.q.out b/ql/src/test/results/clientpositive/groupby4_map.q.out index a8a7fbe..a506770 100644 --- a/ql/src/test/results/clientpositive/groupby4_map.q.out +++ b/ql/src/test/results/clientpositive/groupby4_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +88,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out index 3f38895..7048766 100644 --- a/ql/src/test/results/clientpositive/groupby4_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby4_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +88,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/groupby4_noskew.q.out index c7db0d7..c68dcbb 100644 --- a/ql/src/test/results/clientpositive/groupby4_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +51,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -63,6 +75,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby5.q.out b/ql/src/test/results/clientpositive/groupby5.q.out index 9bf01ee..a65baa7 100644 --- a/ql/src/test/results/clientpositive/groupby5.q.out +++ b/ql/src/test/results/clientpositive/groupby5.q.out @@ -23,6 +23,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -84,6 +87,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -98,6 +111,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/groupby5_map.q.out b/ql/src/test/results/clientpositive/groupby5_map.q.out index 5fbd3d7..87b1ef8 100644 --- a/ql/src/test/results/clientpositive/groupby5_map.q.out +++ b/ql/src/test/results/clientpositive/groupby5_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +90,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out index 60b010b..6c21d2a 100644 --- a/ql/src/test/results/clientpositive/groupby5_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby5_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +90,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/groupby5_noskew.q.out b/ql/src/test/results/clientpositive/groupby5_noskew.q.out index 612a0f6..d71f71c 100644 --- a/ql/src/test/results/clientpositive/groupby5_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby5_noskew.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -59,6 +61,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +85,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/groupby6.q.out b/ql/src/test/results/clientpositive/groupby6.q.out index b790224..c874ba1 100644 --- a/ql/src/test/results/clientpositive/groupby6.q.out +++ b/ql/src/test/results/clientpositive/groupby6.q.out @@ -19,6 +19,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -72,6 +75,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -86,6 +99,57 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby6_map.q.out b/ql/src/test/results/clientpositive/groupby6_map.q.out index 4ba3772..3432642 100644 --- a/ql/src/test/results/clientpositive/groupby6_map.q.out +++ b/ql/src/test/results/clientpositive/groupby6_map.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -54,6 +56,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -68,6 +85,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out index 5141c0d..69e4179 100644 --- a/ql/src/test/results/clientpositive/groupby6_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -19,6 +19,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +79,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -91,6 +108,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/groupby6_noskew.q.out index fd796c7..2b4d0a1 100644 --- a/ql/src/test/results/clientpositive/groupby6_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -49,6 +51,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -63,6 +75,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby7_map.q.out b/ql/src/test/results/clientpositive/groupby7_map.q.out index 0ef29cd..371f7a5 100644 --- a/ql/src/test/results/clientpositive/groupby7_map.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -90,6 +94,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,11 +123,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -133,6 +188,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -144,9 +214,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out index 7c3b033..b9857d1 100644 --- a/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map_multi_single_reducer.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -69,6 +73,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -87,6 +106,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,6 +135,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -111,9 +181,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_map_skew.q.out b/ql/src/test/results/clientpositive/groupby7_map_skew.q.out index 4bfa52e..51b426a 100644 --- a/ql/src/test/results/clientpositive/groupby7_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby7_map_skew.q.out @@ -29,10 +29,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -116,6 +120,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -130,11 +149,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: rand() (type: double) @@ -154,7 +209,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -183,6 +238,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -194,9 +264,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_noskew.q.out b/ql/src/test/results/clientpositive/groupby7_noskew.q.out index 6178f58..06913d9 100644 --- a/ql/src/test/results/clientpositive/groupby7_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby7_noskew.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -78,6 +82,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -92,11 +106,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string) sort order: + Map-reduce partition columns: key (type: string) @@ -121,6 +171,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -132,9 +192,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out index f38c428..c88f70b 100644 --- a/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby7_noskew_multi_single_reducer.q.out @@ -29,9 +29,13 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -108,6 +112,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -122,11 +136,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -152,6 +202,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -163,9 +223,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 diff --git a/ql/src/test/results/clientpositive/groupby8.q.out b/ql/src/test/results/clientpositive/groupby8.q.out index 1856a92..695c3c8 100644 --- a/ql/src/test/results/clientpositive/groupby8.q.out +++ b/ql/src/test/results/clientpositive/groupby8.q.out @@ -29,10 +29,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -103,6 +109,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -117,11 +133,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -140,7 +214,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -169,6 +243,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -180,9 +264,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key @@ -850,10 +978,16 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 + Stage-12 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-6, Stage-9, Stage-11 + Stage-5 depends on stages: Stage-3 Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-7 + Stage-1 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-2 @@ -924,6 +1058,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -938,11 +1082,69 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: key (type: string), substr(value, 5) (type: string) sort order: ++ Map-reduce partition columns: key (type: string) @@ -961,7 +1163,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -990,6 +1192,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1001,9 +1213,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_map.q.out b/ql/src/test/results/clientpositive/groupby8_map.q.out index f683a8b..c887dd5 100644 --- a/ql/src/test/results/clientpositive/groupby8_map.q.out +++ b/ql/src/test/results/clientpositive/groupby8_map.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +72,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +105,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +134,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -110,9 +180,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out index 5e60d3e..c068224 100644 --- a/ql/src/test/results/clientpositive/groupby8_map_skew.q.out +++ b/ql/src/test/results/clientpositive/groupby8_map_skew.q.out @@ -29,10 +29,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -115,6 +119,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -129,11 +148,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) @@ -152,7 +207,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -181,6 +236,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -192,9 +262,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby8_noskew.q.out b/ql/src/test/results/clientpositive/groupby8_noskew.q.out index f683a8b..1f98658 100644 --- a/ql/src/test/results/clientpositive/groupby8_noskew.q.out +++ b/ql/src/test/results/clientpositive/groupby8_noskew.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -68,6 +72,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -86,6 +100,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -100,6 +124,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -110,9 +170,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key diff --git a/ql/src/test/results/clientpositive/groupby9.q.out b/ql/src/test/results/clientpositive/groupby9.q.out index 15ea185..f3d32d2 100644 --- a/ql/src/test/results/clientpositive/groupby9.q.out +++ b/ql/src/test/results/clientpositive/groupby9.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -103,11 +122,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -131,6 +186,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -142,9 +212,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value @@ -812,9 +904,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -873,6 +969,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -887,11 +998,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -915,6 +1062,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -926,9 +1088,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key @@ -1596,9 +1780,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -1657,6 +1845,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1671,11 +1874,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -1699,6 +1938,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1710,9 +1964,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value @@ -2380,9 +2656,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -2442,6 +2722,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -2456,11 +2751,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -2485,6 +2816,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -2496,9 +2842,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value @@ -3166,9 +3534,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -3227,6 +3599,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -3241,11 +3628,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -3269,6 +3692,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -3280,9 +3718,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key diff --git a/ql/src/test/results/clientpositive/groupby_cube1.q.out b/ql/src/test/results/clientpositive/groupby_cube1.q.out index fd70a2c..1b14461 100644 --- a/ql/src/test/results/clientpositive/groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/groupby_cube1.q.out @@ -552,10 +552,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -640,6 +644,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -654,11 +673,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -678,7 +733,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -708,6 +763,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -719,9 +789,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with cube INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with cube diff --git a/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out b/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out index 1e13288..590a23c 100644 --- a/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out +++ b/ql/src/test/results/clientpositive/groupby_cube_multi_gby.q.out @@ -34,9 +34,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -92,6 +96,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,11 +125,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) @@ -130,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -141,6 +211,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out index 9be41df..90c3dba 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +200,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +260,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index 606ab4e..6e3336e 100644 --- a/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -198,6 +200,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +260,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct,struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out b/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out index 5f02b04..7ed6fd8 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_insert_common_distinct.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -89,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -103,11 +122,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: double) @@ -131,6 +186,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -142,9 +212,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table dest1 select key, count(distinct value) group by key insert overwrite table dest2 select key+key, count(distinct value) group by key+key diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out index 256784d..00fd1b4 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out @@ -54,10 +54,16 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 Stage-4 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-12 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 Stage-0 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -97,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +139,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -139,6 +175,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -153,6 +204,49 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -163,9 +257,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -176,9 +292,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-6 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) @@ -276,16 +414,26 @@ STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 Stage-6 depends on stages: Stage-2 + Stage-18 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-19 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-20 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-21 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-22 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-11, Stage-14, Stage-15, Stage-16, Stage-17 + Stage-7 depends on stages: Stage-5 Stage-0 depends on stages: Stage-5 - Stage-7 depends on stages: Stage-0 - Stage-1 depends on stages: Stage-5 - Stage-8 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-0 Stage-9 depends on stages: Stage-5 - Stage-10 depends on stages: Stage-9 - Stage-3 depends on stages: Stage-10 - Stage-11 depends on stages: Stage-3 - Stage-4 depends on stages: Stage-9 - Stage-12 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-5 + Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-5 + Stage-12 depends on stages: Stage-5 + Stage-13 depends on stages: Stage-12 + Stage-3 depends on stages: Stage-13 + Stage-14 depends on stages: Stage-3 + Stage-15 depends on stages: Stage-13 + Stage-4 depends on stages: Stage-12 + Stage-16 depends on stages: Stage-4 + Stage-17 depends on stages: Stage-12 STAGE PLANS: Stage: Stage-5 @@ -335,6 +483,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -356,6 +519,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -377,6 +555,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -391,6 +584,63 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-20 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-21 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h2 + + Stage: Stage-22 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h3 + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -401,9 +651,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -414,10 +686,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-8 + Stage: Stage-10 Stats-Aggr Operator - Stage: Stage-9 + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -467,8 +761,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-10 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -476,7 +785,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Reduce Operator Tree: Select Operator @@ -498,6 +806,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-3 Move Operator @@ -509,9 +832,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 - Stage: Stage-11 + Stage: Stage-14 Stats-Aggr Operator + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-4 Move Operator tables: @@ -522,9 +867,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 - Stage: Stage-12 + Stage: Stage-16 Stats-Aggr Operator + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out index 012b211..5151068 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out @@ -28,8 +28,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -71,6 +75,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +111,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,6 +140,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_g2 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, int + Table: default.dest_g3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -116,9 +186,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT src.key) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1) INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT src.key), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1) diff --git a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out index e41d9ef..c9b6cec 100644 --- a/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/groupby_multi_single_reducer3.q.out @@ -40,8 +40,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -83,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -104,6 +123,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +152,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -128,9 +198,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select key, count(*) @@ -206,8 +298,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -249,6 +345,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -270,6 +381,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -284,6 +410,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -294,9 +456,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select value, count(*) @@ -372,8 +556,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -415,6 +603,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -436,6 +639,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -450,6 +668,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -460,9 +714,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select key, count(*) @@ -538,8 +814,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -581,6 +861,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -602,6 +897,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -616,6 +926,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -626,9 +972,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table e1 select value, count(*) diff --git a/ql/src/test/results/clientpositive/groupby_position.q.out b/ql/src/test/results/clientpositive/groupby_position.q.out index 3ded69c..109abc1 100644 --- a/ql/src/test/results/clientpositive/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/groupby_position.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -87,6 +91,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -101,11 +120,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -129,6 +184,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -140,9 +210,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1, 2 @@ -218,9 +310,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -277,6 +373,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -291,11 +402,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -319,6 +466,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -330,9 +492,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM SRC INSERT OVERWRITE TABLE testTable1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 1 INSERT OVERWRITE TABLE testTable2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) WHERE SRC.key < 20 GROUP BY 2, 1 diff --git a/ql/src/test/results/clientpositive/groupby_ppr.q.out b/ql/src/test/results/clientpositive/groupby_ppr.q.out index 8a75c2e..e85e2d7 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +193,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -224,6 +248,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2 + columns.types string,int,string + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index 68d91eb..8737447 100644 --- a/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,6 +193,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -224,6 +248,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) @@ -285,6 +386,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -431,7 +534,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -452,6 +555,28 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -462,7 +587,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -485,6 +610,87 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns key,c1,c2,c3,c4 + columns.types string,int,string,int,int + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/groupby_rollup1.q.out index 5fd011e..b78d062 100644 --- a/ql/src/test/results/clientpositive/groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/groupby_rollup1.q.out @@ -397,10 +397,14 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 - Stage-5 depends on stages: Stage-2 - Stage-6 depends on stages: Stage-5 - Stage-1 depends on stages: Stage-6 - Stage-7 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 + Stage-6 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-9 depends on stages: Stage-7 STAGE PLANS: Stage: Stage-2 @@ -485,6 +489,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -499,11 +518,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: rand() (type: double) @@ -523,7 +578,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -553,6 +608,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -564,9 +634,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 - Stage: Stage-7 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T1 INSERT OVERWRITE TABLE T2 SELECT key, val, count(1) group by key, val with rollup INSERT OVERWRITE TABLE T3 SELECT key, val, sum(1) group by key, val with rollup diff --git a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out index 92babd7..a891c69 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_1_23.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -106,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +178,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -203,6 +249,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -428,6 +482,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -555,6 +611,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -588,6 +671,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -629,6 +789,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -666,7 +827,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -687,6 +848,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -742,6 +919,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -761,7 +967,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -784,6 +990,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -798,7 +1012,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -828,7 +1042,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -849,7 +1063,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -885,7 +1099,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -915,7 +1129,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -936,7 +1150,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1003,6 +1217,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1040,7 +1255,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1061,6 +1276,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1116,6 +1347,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1135,7 +1395,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1158,6 +1418,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1172,7 +1440,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1202,7 +1470,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1223,7 +1491,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1259,7 +1527,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1289,7 +1557,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1310,7 +1578,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1385,6 +1653,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1443,6 +1712,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1498,6 +1783,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1540,6 +1854,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1766,6 +2088,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1893,6 +2217,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1926,38 +2277,115 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: #### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 -8 1 18 1 -8 1 28 1 -PREHOOK: query: EXPLAIN EXTENDED -INSERT OVERWRITE TABLE outputTbl3 -SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 -PREHOOK: type: QUERY + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 +8 1 18 1 +8 1 28 1 +PREHOOK: query: EXPLAIN EXTENDED +INSERT OVERWRITE TABLE outputTbl3 +SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 +PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 @@ -1966,6 +2394,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2072,7 +2502,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2093,6 +2523,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2103,7 +2560,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2126,6 +2583,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -2168,6 +2702,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2284,7 +2820,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2305,6 +2841,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2315,7 +2878,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2338,6 +2901,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2389,6 +3029,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2428,7 +3069,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2449,6 +3090,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2480,7 +3137,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2501,6 +3158,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2556,6 +3229,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2575,7 +3277,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2598,6 +3300,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2612,7 +3322,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2642,7 +3352,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2663,7 +3373,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2699,7 +3409,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2729,7 +3439,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2750,7 +3460,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2839,6 +3549,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -2990,7 +3701,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3011,6 +3722,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3030,7 +3757,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3051,6 +3778,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3129,6 +3872,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3148,7 +3920,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3171,6 +3943,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -3185,7 +3965,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3215,7 +3995,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3236,7 +4016,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3272,7 +4052,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3302,7 +4082,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3323,7 +4103,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3409,6 +4189,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3548,7 +4330,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3569,6 +4351,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3579,7 +4388,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3602,6 +4411,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -3957,6 +4843,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3995,7 +4883,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4018,7 +4906,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4064,7 +4952,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4085,6 +4973,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4095,7 +5010,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4111,12 +5026,89 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-2 - Stats-Aggr Operator + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4157,6 +5149,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4194,7 +5187,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4215,6 +5208,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4224,7 +5233,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4247,7 +5256,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4270,6 +5279,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4289,7 +5327,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4312,6 +5350,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4326,7 +5372,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4356,7 +5402,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4377,7 +5423,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4413,7 +5459,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4443,7 +5489,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4464,7 +5510,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4542,6 +5588,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4600,6 +5647,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4609,7 +5672,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4632,7 +5695,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4655,6 +5718,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4697,6 +5789,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4926,6 +6026,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4963,7 +6064,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4984,6 +6085,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4993,7 +6110,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5016,7 +6133,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5039,6 +6156,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5058,7 +6204,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5081,6 +6227,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5095,7 +6249,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5125,7 +6279,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5146,7 +6300,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5182,7 +6336,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5212,7 +6366,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5233,7 +6387,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5317,6 +6471,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5354,7 +6509,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5375,6 +6530,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5384,7 +6555,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5407,7 +6578,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5430,6 +6601,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5449,7 +6649,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5472,6 +6672,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5486,7 +6694,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5516,7 +6724,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5537,7 +6745,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5573,7 +6781,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5603,7 +6811,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5624,7 +6832,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5720,8 +6928,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5769,6 +6981,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5788,6 +7015,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5802,6 +7044,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -5812,9 +7090,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val @@ -5875,8 +7175,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -5927,6 +7231,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -5946,6 +7265,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -5960,6 +7294,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -5970,9 +7340,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val diff --git a/ql/src/test/results/clientpositive/groupby_sort_2.q.out b/ql/src/test/results/clientpositive/groupby_sort_2.q.out index de6bf14..e49e956 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_2.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_2.q.out @@ -46,6 +46,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +92,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,6 +121,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: val, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT val, count(1) FROM T1 GROUP BY val PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_3.q.out b/ql/src/test/results/clientpositive/groupby_sort_3.q.out index da1db8c..d295a05 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_3.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_3.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -81,6 +82,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -183,6 +217,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -216,6 +251,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -239,6 +300,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/groupby_sort_4.q.out b/ql/src/test/results/clientpositive/groupby_sort_4.q.out index ae2ae66..bed68f6 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_4.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_4.q.out @@ -46,6 +46,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -90,6 +92,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,6 +121,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY @@ -149,6 +195,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -192,6 +240,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -206,6 +269,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_5.q.out b/ql/src/test/results/clientpositive/groupby_sort_5.q.out index 40b9769..4b0e491 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_5.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_5.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -81,6 +82,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -211,6 +245,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -244,6 +279,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -267,6 +328,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -380,6 +448,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -424,6 +494,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -438,6 +523,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.outputtbl2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_6.q.out b/ql/src/test/results/clientpositive/groupby_sort_6.q.out index 80baa66..375f736 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_6.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_6.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -102,6 +104,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -135,6 +164,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key PREHOOK: type: QUERY @@ -176,6 +282,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -231,7 +339,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -252,6 +360,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -262,7 +397,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -285,6 +420,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '1' GROUP BY key PREHOOK: type: QUERY @@ -317,6 +529,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -419,7 +633,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -440,6 +654,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -450,7 +691,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -473,6 +714,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 where ds = '2' GROUP BY key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/groupby_sort_7.q.out b/ql/src/test/results/clientpositive/groupby_sort_7.q.out index 21b0a37..e7c9a95 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_7.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_7.q.out @@ -51,6 +51,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -84,6 +85,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -107,6 +134,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: string, string, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out index 6e09753..b22feb8 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_skew_1_23.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -106,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +178,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -203,6 +249,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -429,6 +483,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -624,6 +680,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -657,6 +740,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -698,6 +858,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -735,7 +896,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -756,6 +917,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -811,6 +988,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -830,7 +1036,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -853,6 +1059,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -867,7 +1081,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -897,7 +1111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -918,7 +1132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -954,7 +1168,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -984,7 +1198,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1005,7 +1219,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1072,6 +1286,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1109,7 +1324,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1130,6 +1345,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1185,6 +1416,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1204,7 +1464,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1227,6 +1487,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1241,7 +1509,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1271,7 +1539,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1292,7 +1560,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1328,7 +1596,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1358,7 +1626,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1379,7 +1647,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1454,6 +1722,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1512,6 +1781,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1567,6 +1852,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1609,6 +1923,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1836,6 +2158,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2031,6 +2355,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2064,32 +2415,109 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### -PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Output: default@outputtbl4 -POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 -SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Output: default@outputtbl4 -POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] -POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] -POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] -POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] -PREHOOK: query: SELECT * FROM outputTbl4 -PREHOOK: type: QUERY -PREHOOK: Input: default@outputtbl4 + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: #### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM outputTbl4 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@outputtbl4 + Path -> Partition: #### A masked pattern was here #### -1 1 11 1 -2 1 12 1 -3 1 13 1 -7 1 17 1 + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + +PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@outputtbl4 +POSTHOOK: query: INSERT OVERWRITE TABLE outputTbl4 +SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Output: default@outputtbl4 +POSTHOOK: Lineage: outputtbl4.cnt EXPRESSION [(t1)t1.null, ] +POSTHOOK: Lineage: outputtbl4.key1 EXPRESSION [(t1)t1.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: outputtbl4.key2 SIMPLE [] +POSTHOOK: Lineage: outputtbl4.key3 SIMPLE [(t1)t1.FieldSchema(name:val, type:string, comment:null), ] +PREHOOK: query: SELECT * FROM outputTbl4 +PREHOOK: type: QUERY +PREHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM outputTbl4 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@outputtbl4 +#### A masked pattern was here #### +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 8 1 18 1 8 1 28 1 PREHOOK: query: EXPLAIN EXTENDED @@ -2105,6 +2533,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2279,7 +2709,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2300,6 +2730,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2310,7 +2767,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -2333,6 +2790,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -2376,6 +2910,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2560,7 +3096,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2581,6 +3117,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2591,7 +3154,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2614,6 +3177,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2665,6 +3305,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2704,7 +3345,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2725,6 +3366,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan alias: t1 Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE @@ -2756,7 +3413,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2777,6 +3434,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2832,6 +3505,35 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1, null-subquery2:$hdt$_0-subquery2:$hdt$_0:t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2851,7 +3553,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2874,6 +3576,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2888,7 +3598,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2918,7 +3628,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2939,7 +3649,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2975,7 +3685,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3005,7 +3715,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3026,7 +3736,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3116,6 +3826,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -3335,7 +4046,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3356,6 +4067,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -3375,7 +4102,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3396,6 +4123,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3474,6 +4217,35 @@ STAGE PLANS: Truncated Path -> Alias: /t1 [null-subquery1:$hdt$_0-subquery1:$hdt$_0:t1] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3493,7 +4265,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3516,6 +4288,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -3530,7 +4310,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3560,7 +4340,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3581,7 +4361,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3617,7 +4397,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3647,7 +4427,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3668,7 +4448,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3754,6 +4534,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -3893,7 +4675,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3914,6 +4696,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3924,7 +4733,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3947,6 +4756,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -4372,6 +5258,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -4410,7 +5298,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4433,7 +5321,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4547,7 +5435,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4568,6 +5456,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -4578,7 +5493,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -4594,12 +5509,89 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - - Stage: Stage-3 - Stats-Aggr Operator + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + + Stage: Stage-3 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key @@ -4640,6 +5632,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4677,7 +5670,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4698,6 +5691,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4707,7 +5716,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4730,7 +5739,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -4753,6 +5762,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4772,7 +5810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4795,6 +5833,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4809,7 +5855,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4839,7 +5885,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4860,7 +5906,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4896,7 +5942,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4926,7 +5972,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -4947,7 +5993,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5025,6 +6071,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5083,6 +6130,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5092,7 +6155,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5115,7 +6178,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5138,6 +6201,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5180,6 +6272,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5409,6 +6509,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5446,7 +6547,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5467,6 +6568,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5476,7 +6593,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5499,7 +6616,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5522,6 +6639,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5541,7 +6687,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5564,6 +6710,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5578,7 +6732,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5608,7 +6762,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5629,7 +6783,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5665,7 +6819,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5695,7 +6849,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5716,7 +6870,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5800,6 +6954,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -5837,7 +6992,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5858,6 +7013,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -5867,7 +7038,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5890,7 +7061,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -5913,6 +7084,35 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [$hdt$_0:t2] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -5932,7 +7132,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5955,6 +7155,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -5969,7 +7177,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -5999,7 +7207,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6020,7 +7228,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6056,7 +7264,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6086,7 +7294,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6107,7 +7315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -6204,8 +7412,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6253,6 +7465,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6296,6 +7523,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6310,6 +7552,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -6320,9 +7598,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM T2 INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val @@ -6384,8 +7684,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-2 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -6436,6 +7740,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -6479,6 +7798,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -6493,6 +7827,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -6503,9 +7873,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select key, val from T2 where key = 8) x INSERT OVERWRITE TABLE DEST1 SELECT key, count(1) GROUP BY key INSERT OVERWRITE TABLE DEST2 SELECT key, val, count(1) GROUP BY key, val diff --git a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out index 5b94c0e..fcf6a08 100644 --- a/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out +++ b/ql/src/test/results/clientpositive/groupby_sort_test_1.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -81,6 +82,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -104,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out index c4b9dc4..be9f24a 100644 --- a/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out +++ b/ql/src/test/results/clientpositive/implicit_cast_during_insert.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -56,6 +57,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.implicit_cast_during_insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: c1, c2, p1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + keys: p1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -72,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.implicit_cast_during_insert + PREHOOK: query: insert overwrite table implicit_cast_during_insert partition (p1) select key, value, key key1 from (select * from src where key in (0,1)) q distribute by key1 sort by key1 diff --git a/ql/src/test/results/clientpositive/index_auto_update.q.out b/ql/src/test/results/clientpositive/index_auto_update.q.out index e7bc069..6f90f5d 100644 --- a/ql/src/test/results/clientpositive/index_auto_update.q.out +++ b/ql/src/test/results/clientpositive/index_auto_update.q.out @@ -46,6 +46,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-0 Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-13 depends on stages: Stage-3, Stage-4, Stage-5, Stage-7 Stage-4 depends on stages: Stage-2 Stage-5 depends on stages: Stage-2 Stage-7 depends on stages: Stage-0 @@ -72,6 +73,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, val + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-12 Conditional Operator @@ -137,6 +164,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: string, string + Table: default.temp + Stage: Stage-4 Stage: Stage-5 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out index 1aea388..9ac873e 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -142,7 +142,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -193,7 +193,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -244,7 +244,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 4970 @@ -295,7 +295,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -346,7 +346,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 2654 rawDataSize 28466 @@ -397,7 +397,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -448,7 +448,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -499,7 +499,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -550,7 +550,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -601,7 +601,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -652,7 +652,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -703,7 +703,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -754,7 +754,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -805,7 +805,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -856,7 +856,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -907,7 +907,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 6 rawDataSize 18 @@ -958,7 +958,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 2964 @@ -1009,7 +1009,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 5 rawDataSize 19 @@ -1060,7 +1060,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1111,7 +1111,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1162,7 +1162,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1213,7 +1213,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -1264,7 +1264,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 3582 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out index 52ebe5a..2e996b9 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_convert_join.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -62,8 +62,6 @@ SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@test_table@part=1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask POSTHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (part = '1') SELECT a.key, b.value FROM src a JOIN src b ON a.key = b.key POSTHOOK: type: QUERY @@ -93,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -105,8 +103,8 @@ SerDe Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Compressed: No -Num Buckets: 1 -Bucket Columns: [key] -Sort Columns: [Order(col:key, order:1)] +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] Storage Desc Params: serialization.format 1 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index d62d0b8..d238545 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -58,7 +58,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -98,7 +98,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 4812 @@ -161,7 +161,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -201,7 +201,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1173 @@ -266,7 +266,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 155 rawDataSize 586 @@ -306,7 +306,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 154 rawDataSize 591 @@ -428,9 +428,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -474,6 +476,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_table + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -500,6 +518,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -520,6 +545,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 2906 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key, value, IF (key % 100 == 0, '11', '12') FROM (SELECT key, COUNT(*) AS value FROM srcpart @@ -568,7 +622,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 4 rawDataSize 14 @@ -608,7 +662,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 305 rawDataSize 1163 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out index 3e29664..92ae913 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_grouping_operators.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -68,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(agg, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,6 +102,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 750 Data size: 7968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 375 Data size: 3984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: SELECT key, value, count(1) FROM src GROUP BY ROLLUP (key, value) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -747,7 +801,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 6309 @@ -1429,7 +1483,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 619 rawDataSize 7547 @@ -1456,6 +1510,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1500,6 +1556,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(agg, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1516,6 +1588,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value WITH CUBE PREHOOK: type: QUERY @@ -1552,7 +1660,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 9954 @@ -1607,7 +1715,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 928 rawDataSize 11810 @@ -1634,6 +1742,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -1678,6 +1788,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), '1' (type: string) + outputColumnNames: key, value, agg, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(agg, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1694,6 +1820,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, agg + Column Types: string, string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, value, count(1) FROM src GROUP BY key, value GROUPING SETS (key, value) PREHOOK: type: QUERY @@ -1730,7 +1892,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 6054 @@ -1785,7 +1947,7 @@ Database: default Table: test_table_out_2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"agg\":\"true\",\"grouping_key\":\"true\",\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 618 rawDataSize 7290 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out index 98a2f5f..c8d1492 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_list_bucket.q.out @@ -46,7 +46,7 @@ Database: default Table: list_bucketing_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 309 rawDataSize 1482 @@ -116,7 +116,7 @@ Database: default Table: list_bucketing_table2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out index 32edd73..fab1793 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_map_operators.q.out @@ -58,6 +58,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -91,6 +92,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -116,6 +151,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -180,7 +222,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -218,6 +260,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -281,6 +325,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -297,6 +357,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT a.key, a.value FROM ( SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key @@ -372,6 +468,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -403,6 +500,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -428,6 +553,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -494,7 +626,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -523,6 +655,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -567,6 +701,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -583,6 +731,38 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key GROUP BY b.value @@ -621,7 +801,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2728 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out index bf77d4c..bf4daf2 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_merge.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1028 rawDataSize 10968 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out index 59b20fe..114fcfe 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_multi_insert.q.out @@ -46,7 +46,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -85,7 +85,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -142,7 +142,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -181,7 +181,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2718 @@ -238,7 +238,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -277,7 +277,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -334,7 +334,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 1482 @@ -373,7 +373,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 309 rawDataSize 2690 diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out index 0c61fe0..29e29ae 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -56,6 +58,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-04-08' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -73,6 +91,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) + Statistics: Num rows: 58 Data size: 11624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-04-08' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key2, value, cast(hr as int) FROM (SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr @@ -123,7 +177,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 670 rawDataSize 5888 @@ -163,7 +217,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 330 rawDataSize 2924 diff --git a/ql/src/test/results/clientpositive/innerjoin.q.out b/ql/src/test/results/clientpositive/innerjoin.q.out index 4a7649d..23d3777 100644 --- a/ql/src/test/results/clientpositive/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/innerjoin.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input11.q.out b/ql/src/test/results/clientpositive/input11.q.out index bb22ee8..9543c65 100644 --- a/ql/src/test/results/clientpositive/input11.q.out +++ b/ql/src/test/results/clientpositive/input11.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input11_limit.q.out b/ql/src/test/results/clientpositive/input11_limit.q.out index 597554e..4a1b77a 100644 --- a/ql/src/test/results/clientpositive/input11_limit.q.out +++ b/ql/src/test/results/clientpositive/input11_limit.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -39,7 +41,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -61,6 +62,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +91,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input12.q.out b/ql/src/test/results/clientpositive/input12.q.out index 3bb765c..470bbf2 100644 --- a/ql/src/test/results/clientpositive/input12.q.out +++ b/ql/src/test/results/clientpositive/input12.q.out @@ -40,23 +40,18 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-14 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-15 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-16 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-1 depends on stages: Stage-3 Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 + Stage-11 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 + Stage-12 depends on stages: Stage-2 + Stage-13 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -80,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 200) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -110,6 +133,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -133,6 +185,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -163,15 +236,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -189,40 +253,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator - - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -237,38 +284,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-16 + Stage: Stage-12 Stats-Aggr Operator - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-19 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 diff --git a/ql/src/test/results/clientpositive/input13.q.out b/ql/src/test/results/clientpositive/input13.q.out index 91ff234..1bc031d 100644 --- a/ql/src/test/results/clientpositive/input13.q.out +++ b/ql/src/test/results/clientpositive/input13.q.out @@ -42,29 +42,19 @@ STAGE DEPENDENCIES: Stage-7 Stage-0 depends on stages: Stage-7, Stage-6, Stage-9 Stage-5 depends on stages: Stage-0 + Stage-15 depends on stages: Stage-5, Stage-11, Stage-12, Stage-13, Stage-14, Stage-3 + Stage-16 depends on stages: Stage-5, Stage-11, Stage-12, Stage-13, Stage-14, Stage-3 + Stage-17 depends on stages: Stage-5, Stage-11, Stage-12, Stage-13, Stage-14, Stage-3 Stage-6 Stage-8 Stage-9 depends on stages: Stage-8 - Stage-16 depends on stages: Stage-4 , consists of Stage-13, Stage-12, Stage-14 - Stage-13 - Stage-1 depends on stages: Stage-13, Stage-12, Stage-15 + Stage-1 depends on stages: Stage-4 Stage-11 depends on stages: Stage-1 - Stage-12 - Stage-14 - Stage-15 depends on stages: Stage-14 - Stage-22 depends on stages: Stage-4 , consists of Stage-19, Stage-18, Stage-20 - Stage-19 - Stage-2 depends on stages: Stage-19, Stage-18, Stage-21 - Stage-17 depends on stages: Stage-2 - Stage-18 - Stage-20 - Stage-21 depends on stages: Stage-20 - Stage-27 depends on stages: Stage-4 , consists of Stage-24, Stage-23, Stage-25 - Stage-24 - Stage-3 depends on stages: Stage-24, Stage-23, Stage-26 - Stage-23 - Stage-25 - Stage-26 depends on stages: Stage-25 + Stage-12 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-4 + Stage-13 depends on stages: Stage-2 + Stage-14 depends on stages: Stage-4 + Stage-3 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-4 @@ -88,6 +78,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -103,6 +106,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((key >= 200) and (key < 300)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -118,6 +136,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (key >= 300) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +166,19 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-10 Conditional Operator @@ -155,6 +202,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-6 Map Reduce Map Operator Tree: @@ -185,15 +253,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-16 - Conditional Operator - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -211,40 +270,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-15 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-22 - Conditional Operator - - Stage: Stage-19 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -259,47 +301,37 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 - Stage: Stage-17 + Stage: Stage-13 Stats-Aggr Operator - Stage: Stage-18 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-20 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest3 - - Stage: Stage-21 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-27 - Conditional Operator - - Stage: Stage-24 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### Stage: Stage-3 Move Operator @@ -307,34 +339,6 @@ STAGE PLANS: hdfs directory: true destination: target/warehouse/dest4.out - Stage: Stage-23 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-25 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-26 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.* WHERE src.key < 100 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key >= 100 and src.key < 200 diff --git a/ql/src/test/results/clientpositive/input14.q.out b/ql/src/test/results/clientpositive/input14.q.out index af04a98..e5120fc 100644 --- a/ql/src/test/results/clientpositive/input14.q.out +++ b/ql/src/test/results/clientpositive/input14.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -83,6 +100,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value) diff --git a/ql/src/test/results/clientpositive/input14_limit.q.out b/ql/src/test/results/clientpositive/input14_limit.q.out index 9870ad5..828a094 100644 --- a/ql/src/test/results/clientpositive/input14_limit.q.out +++ b/ql/src/test/results/clientpositive/input14_limit.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,7 +55,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -79,7 +80,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -104,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +133,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value) diff --git a/ql/src/test/results/clientpositive/input17.q.out b/ql/src/test/results/clientpositive/input17.q.out index 057a92d..8ecf7f9 100644 --- a/ql/src/test/results/clientpositive/input17.q.out +++ b/ql/src/test/results/clientpositive/input17.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +97,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src_thrift SELECT TRANSFORM(src_thrift.aint + src_thrift.lint[0], src_thrift.lintstring[0]) diff --git a/ql/src/test/results/clientpositive/input18.q.out b/ql/src/test/results/clientpositive/input18.q.out index b341510..be98cda 100644 --- a/ql/src/test/results/clientpositive/input18.q.out +++ b/ql/src/test/results/clientpositive/input18.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -83,6 +100,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value, 1+2, 3+4) diff --git a/ql/src/test/results/clientpositive/input1_limit.q.out b/ql/src/test/results/clientpositive/input1_limit.q.out index 0ca1552..4e69b72 100644 --- a/ql/src/test/results/clientpositive/input1_limit.q.out +++ b/ql/src/test/results/clientpositive/input1_limit.q.out @@ -28,9 +28,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -52,7 +56,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Filter Operator predicate: (key < 100) (type: boolean) @@ -90,6 +93,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -104,14 +122,49 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -133,6 +186,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -144,9 +212,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, src.value WHERE src.key < 100 LIMIT 10 INSERT OVERWRITE TABLE dest2 SELECT src.key, src.value WHERE src.key < 100 LIMIT 5 diff --git a/ql/src/test/results/clientpositive/input20.q.out b/ql/src/test/results/clientpositive/input20.q.out index cf0ee1d..076deaf 100644 --- a/ql/src/test/results/clientpositive/input20.q.out +++ b/ql/src/test/results/clientpositive/input20.q.out @@ -36,6 +36,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -85,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -99,6 +116,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src MAP src.key, src.key diff --git a/ql/src/test/results/clientpositive/input30.q.out b/ql/src/test/results/clientpositive/input30.q.out index 130f22d..a9fb038 100644 --- a/ql/src/test/results/clientpositive/input30.q.out +++ b/ql/src/test/results/clientpositive/input30.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -78,6 +99,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest30 + PREHOOK: query: insert overwrite table dest30 select count(1) from src PREHOOK: type: QUERY @@ -110,6 +138,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -147,6 +176,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest30 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -161,6 +210,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.dest30 + PREHOOK: query: insert overwrite table dest30 select count(1) from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input31.q.out b/ql/src/test/results/clientpositive/input31.q.out index 264ebe5..0fe569b 100644 --- a/ql/src/test/results/clientpositive/input31.q.out +++ b/ql/src/test/results/clientpositive/input31.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest31 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -80,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest31 + PREHOOK: query: insert overwrite table dest31 select count(1) from srcbucket PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input32.q.out b/ql/src/test/results/clientpositive/input32.q.out index c8fdfd4..97a2f22 100644 --- a/ql/src/test/results/clientpositive/input32.q.out +++ b/ql/src/test/results/clientpositive/input32.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -63,6 +64,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tst_dest32 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: a + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -77,6 +98,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: a + Column Types: int + Table: default.tst_dest32 + PREHOOK: query: insert overwrite table dest32 select count(1) from srcbucket PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input33.q.out b/ql/src/test/results/clientpositive/input33.q.out index b35e2d0..9d6fcc0 100644 --- a/ql/src/test/results/clientpositive/input33.q.out +++ b/ql/src/test/results/clientpositive/input33.q.out @@ -36,6 +36,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -85,6 +87,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -99,6 +116,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src MAP src.key, src.key diff --git a/ql/src/test/results/clientpositive/input34.q.out b/ql/src/test/results/clientpositive/input34.q.out index 72f66c3..201da7e 100644 --- a/ql/src/test/results/clientpositive/input34.q.out +++ b/ql/src/test/results/clientpositive/input34.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input35.q.out b/ql/src/test/results/clientpositive/input35.q.out index 8b86991..7509d14 100644 --- a/ql/src/test/results/clientpositive/input35.q.out +++ b/ql/src/test/results/clientpositive/input35.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input36.q.out b/ql/src/test/results/clientpositive/input36.q.out index 76921ad..d018fbe 100644 --- a/ql/src/test/results/clientpositive/input36.q.out +++ b/ql/src/test/results/clientpositive/input36.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input38.q.out b/ql/src/test/results/clientpositive/input38.q.out index 0c4e81d..36efd44 100644 --- a/ql/src/test/results/clientpositive/input38.q.out +++ b/ql/src/test/results/clientpositive/input38.q.out @@ -28,6 +28,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -58,6 +59,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -81,6 +108,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input3_limit.q.out b/ql/src/test/results/clientpositive/input3_limit.q.out index c7bc4ac..19e5b79 100644 --- a/ql/src/test/results/clientpositive/input3_limit.q.out +++ b/ql/src/test/results/clientpositive/input3_limit.q.out @@ -41,6 +41,8 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -58,7 +60,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 58 Data size: 11603 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -81,7 +82,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -99,6 +99,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 4000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -113,6 +128,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.t2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE T2 SELECT * FROM (SELECT * FROM T1 DISTRIBUTE BY key SORT BY key, value) T LIMIT 20 PREHOOK: type: QUERY PREHOOK: Input: default@t1 diff --git a/ql/src/test/results/clientpositive/input5.q.out b/ql/src/test/results/clientpositive/input5.q.out index a399528..c137f8b 100644 --- a/ql/src/test/results/clientpositive/input5.q.out +++ b/ql/src/test/results/clientpositive/input5.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +97,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM src_thrift SELECT TRANSFORM(src_thrift.lint, src_thrift.lintstring) diff --git a/ql/src/test/results/clientpositive/input6.q.out b/ql/src/test/results/clientpositive/input6.q.out index 3d1a815..cf007cf 100644 --- a/ql/src/test/results/clientpositive/input6.q.out +++ b/ql/src/test/results/clientpositive/input6.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input7.q.out b/ql/src/test/results/clientpositive/input7.q.out index 0545b1f..380382f 100644 --- a/ql/src/test/results/clientpositive/input7.q.out +++ b/ql/src/test/results/clientpositive/input7.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: int) + outputColumnNames: c1, c2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: double, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input8.q.out b/ql/src/test/results/clientpositive/input8.q.out index d76fc2b..141ca38 100644 --- a/ql/src/test/results/clientpositive/input8.q.out +++ b/ql/src/test/results/clientpositive/input8.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input9.q.out b/ql/src/test/results/clientpositive/input9.q.out index af752e0..a4f1594 100644 --- a/ql/src/test/results/clientpositive/input9.q.out +++ b/ql/src/test/results/clientpositive/input9.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, key + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(key, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: value, key + Column Types: string, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_part1.q.out b/ql/src/test/results/clientpositive/input_part1.q.out index 2f6f302..00b294e 100644 --- a/ql/src/test/results/clientpositive/input_part1.q.out +++ b/ql/src/test/results/clientpositive/input_part1.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +142,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -167,6 +213,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_part10.q.out b/ql/src/test/results/clientpositive/input_part10.q.out index 4b552ba..53f211e 100644 --- a/ql/src/test/results/clientpositive/input_part10.q.out +++ b/ql/src/test/results/clientpositive/input_part10.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +50,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: int) Reduce Operator Tree: Select Operator @@ -70,6 +71,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_special + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008 04 08' (type: string), '10:11:12=455' (type: string) + outputColumnNames: a, b, ds, ts + Statistics: Num rows: 1 Data size: 558 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + keys: ds (type: string), ts (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1174 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -87,6 +104,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: string, string + Table: default.part_special + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1174 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1150 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE part_special PARTITION(ds='2008 04 08', ts = '10:11:12=455') SELECT 1, 2 FROM src LIMIT 1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/input_part2.q.out b/ql/src/test/results/clientpositive/input_part2.q.out index 95f36ab..866636f 100644 --- a/ql/src/test/results/clientpositive/input_part2.q.out +++ b/ql/src/test/results/clientpositive/input_part2.q.out @@ -30,16 +30,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -89,6 +87,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-08' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) @@ -129,6 +143,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -231,6 +272,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -273,6 +343,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest2 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -453,15 +539,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -499,180 +576,69 @@ STAGE PLANS: Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value,hr,ds - columns.comments - columns.types int:string:string:string -#### A masked pattern was here #### - name default.dest2 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct dest2 { i32 key, string value, string hr, string ds} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - name: default.dest2 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + MultiFileSpray: false PREHOOK: query: FROM srcpart INSERT OVERWRITE TABLE dest1 SELECT srcpart.key, srcpart.value, srcpart.hr, srcpart.ds WHERE srcpart.key < 100 and srcpart.ds = '2008-04-08' and srcpart.hr = '12' diff --git a/ql/src/test/results/clientpositive/input_part5.q.out b/ql/src/test/results/clientpositive/input_part5.q.out index c6ae2fd..42e1ea5 100644 --- a/ql/src/test/results/clientpositive/input_part5.q.out +++ b/ql/src/test/results/clientpositive/input_part5.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_testsequencefile.q.out b/ql/src/test/results/clientpositive/input_testsequencefile.q.out index 60aaf83..b3f3def 100644 --- a/ql/src/test/results/clientpositive/input_testsequencefile.q.out +++ b/ql/src/test/results/clientpositive/input_testsequencefile.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4_sequencefile + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest4_sequencefile + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_testxpath.q.out b/ql/src/test/results/clientpositive/input_testxpath.q.out index e07628a..b81b56c 100644 --- a/ql/src/test/results/clientpositive/input_testxpath.q.out +++ b/ql/src/test/results/clientpositive/input_testxpath.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, mapvalue + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(mapvalue, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, mapvalue + Column Types: int, string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/input_testxpath2.q.out b/ql/src/test/results/clientpositive/input_testxpath2.q.out index a0baccf..373fcb0 100644 --- a/ql/src/test/results/clientpositive/input_testxpath2.q.out +++ b/ql/src/test/results/clientpositive/input_testxpath2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: lint_size, lintstring_size, mstringstring_size + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(lint_size, 16), compute_stats(lintstring_size, 16), compute_stats(mstringstring_size, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -69,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: lint_size, lintstring_size, mstringstring_size + Column Types: int, int, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert1.q.out b/ql/src/test/results/clientpositive/insert1.q.out index 3952578..685bfe2 100644 --- a/ql/src/test/results/clientpositive/insert1.q.out +++ b/ql/src/test/results/clientpositive/insert1.q.out @@ -34,6 +34,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -60,6 +61,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -83,6 +110,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -123,6 +157,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +184,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -172,6 +233,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -226,6 +294,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -252,6 +321,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -275,6 +370,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -315,6 +417,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -341,6 +444,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -364,6 +493,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -410,16 +546,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -443,6 +577,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -458,6 +605,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -481,6 +656,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -511,15 +700,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -537,31 +717,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: x.insert1 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: CREATE DATABASE db2 PREHOOK: type: CREATEDATABASE diff --git a/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out index 49c1269..5231349 100644 --- a/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/insert1_overwrite_partitions.q.out @@ -42,6 +42,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -58,7 +60,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -75,6 +76,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string), '11' (type: string) + outputColumnNames: one, two, ds, hr + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -92,6 +109,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE destinTable PARTITION (ds='2011-11-11', hr='11') if not exists SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 PREHOOK: type: QUERY @@ -175,6 +228,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -191,7 +246,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -208,6 +262,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: one, two + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -222,6 +291,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: default.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE destinTable SELECT one,two FROM sourceTable WHERE ds='2011-11-11' AND hr='11' order by one desc, two desc limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@sourcetable diff --git a/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out b/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out index b5f28d2..15e6d76 100644 --- a/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out +++ b/ql/src/test/results/clientpositive/insert2_overwrite_partitions.q.out @@ -53,6 +53,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -69,7 +71,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -86,6 +87,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -102,6 +119,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 PREHOOK: type: QUERY @@ -141,6 +194,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -157,7 +212,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: -- Statistics: Num rows: 30 Data size: 6028 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -174,6 +228,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: db2.destintable + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2011-11-11' (type: string) + outputColumnNames: one, two, ds + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(one, 16), compute_stats(two, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -190,6 +260,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: one, two + Column Types: string, string + Table: db2.destintable + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE db2.destinTable PARTITION (ds='2011-11-11') SELECT one,two FROM db1.sourceTable WHERE ds='2011-11-11' order by one desc, two desc limit 5 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/insert_into1.q.out b/ql/src/test/results/clientpositive/insert_into1.q.out index da863a7..46c0512 100644 --- a/ql/src/test/results/clientpositive/insert_into1.q.out +++ b/ql/src/test/results/clientpositive/insert_into1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -34,7 +36,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -56,6 +57,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -70,6 +86,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -126,6 +171,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -142,7 +189,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -164,6 +210,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -178,6 +239,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -234,6 +324,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -250,7 +342,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -272,6 +363,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -286,6 +392,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -344,6 +479,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -368,6 +504,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -391,6 +553,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -441,6 +610,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -465,6 +635,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -488,6 +684,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_into2.q.out b/ql/src/test/results/clientpositive/insert_into2.q.out index 46fab7b..0469bfe 100644 --- a/ql/src/test/results/clientpositive/insert_into2.q.out +++ b/ql/src/test/results/clientpositive/insert_into2.q.out @@ -22,6 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -38,7 +40,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -60,6 +61,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -76,6 +93,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -171,6 +224,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -187,7 +242,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -209,6 +263,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -225,6 +295,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 PREHOOK: type: QUERY @@ -289,6 +395,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -305,7 +413,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reduce Operator Tree: Select Operator @@ -327,6 +434,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -343,6 +466,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/insert_into3.q.out b/ql/src/test/results/clientpositive/insert_into3.q.out index ae7523b..e1e943d 100644 --- a/ql/src/test/results/clientpositive/insert_into3.q.out +++ b/ql/src/test/results/clientpositive/insert_into3.q.out @@ -32,9 +32,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -51,7 +55,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -82,6 +85,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -96,15 +114,50 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) @@ -125,6 +178,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -136,9 +204,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT INTO TABLE insert_into3a SELECT * ORDER BY key, value LIMIT 50 INSERT INTO TABLE insert_into3b SELECT * ORDER BY key, value LIMIT 100 PREHOOK: type: QUERY @@ -191,9 +281,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -212,7 +306,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Select Operator expressions: key (type: string), value (type: string) @@ -247,6 +340,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -261,14 +369,49 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -290,6 +433,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -301,9 +459,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE insert_into3a SELECT * LIMIT 10 INSERT INTO TABLE insert_into3b SELECT * LIMIT 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/insert_into4.q.out b/ql/src/test/results/clientpositive/insert_into4.q.out index bb4e557..c24ecdd 100644 --- a/ql/src/test/results/clientpositive/insert_into4.q.out +++ b/ql/src/test/results/clientpositive/insert_into4.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +50,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -70,6 +71,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -84,6 +100,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -115,6 +160,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -133,7 +180,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -155,6 +201,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -169,6 +230,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into4a SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -202,6 +292,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -225,6 +316,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into4b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -248,6 +365,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into4b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_into5.q.out b/ql/src/test/results/clientpositive/insert_into5.q.out index 7b471f4..1eaebc7 100644 --- a/ql/src/test/results/clientpositive/insert_into5.q.out +++ b/ql/src/test/results/clientpositive/insert_into5.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,7 +50,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -66,6 +67,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 910 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -80,6 +96,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into5a SELECT 1, 'one' FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -113,6 +158,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +182,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -159,6 +231,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5a + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -224,6 +303,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -247,6 +327,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -272,6 +386,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -339,6 +460,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -362,6 +484,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into5b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 20 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 50 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -387,6 +543,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into5b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_into6.q.out b/ql/src/test/results/clientpositive/insert_into6.q.out index d93a167..ba062f1 100644 --- a/ql/src/test/results/clientpositive/insert_into6.q.out +++ b/ql/src/test/results/clientpositive/insert_into6.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -50,7 +52,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -72,6 +73,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -88,6 +105,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6a + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 150 Data size: 1500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 75 Data size: 750 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE insert_into6a PARTITION (ds='1') SELECT * FROM src LIMIT 150 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -137,6 +190,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -160,6 +214,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into6b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2680 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1340 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -185,6 +273,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into6b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out b/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out index 24db44f..50d638b 100644 --- a/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out +++ b/ql/src/test/results/clientpositive/insert_values_orig_table_use_metadata.q.out @@ -305,6 +305,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 1 numRows 0 rawDataSize 0 @@ -427,6 +428,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 2 numRows 0 rawDataSize 0 @@ -677,7 +679,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -869,6 +871,7 @@ Database: default Table: sp #### A masked pattern was here #### Partition Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 0 rawDataSize 0 @@ -920,7 +923,7 @@ Database: default Table: sp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 502 rawDataSize 5318 diff --git a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out index c2732c8..db70465 100644 --- a/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out +++ b/ql/src/test/results/clientpositive/insertoverwrite_bucket.q.out @@ -100,6 +100,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -130,6 +132,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: change, num + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(change, 16), compute_stats(num, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -144,6 +161,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: change, num + Column Types: string, string + Table: default.temp1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: CREATE TABLE temp2 ( create_ts STRING , @@ -178,6 +224,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -209,6 +257,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.temp2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: create_ts, change, num + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(create_ts, 16), compute_stats(change, 16), compute_stats(num, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -223,6 +286,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: create_ts, change, num + Column Types: string, string, string + Table: default.temp2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: select * from bucketoutput1 a join bucketoutput2 b on (a.data=b.data) PREHOOK: type: QUERY PREHOOK: Input: default@bucketoutput1 diff --git a/ql/src/test/results/clientpositive/join14.q.out b/ql/src/test/results/clientpositive/join14.q.out index 66e42f1..03c3dba 100644 --- a/ql/src/test/results/clientpositive/join14.q.out +++ b/ql/src/test/results/clientpositive/join14.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join17.q.out b/ql/src/test/results/clientpositive/join17.q.out index 8bd67eb..6b3e4d7a 100644 --- a/ql/src/test/results/clientpositive/join17.q.out +++ b/ql/src/test/results/clientpositive/join17.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -162,6 +164,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -195,6 +224,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types struct,struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join25.q.out b/ql/src/test/results/clientpositive/join25.q.out index 5ad95c5..99b2bb1 100644 --- a/ql/src/test/results/clientpositive/join25.q.out +++ b/ql/src/test/results/clientpositive/join25.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join26.q.out b/ql/src/test/results/clientpositive/join26.q.out index 5bc8245..647a9a0 100644 --- a/ql/src/test/results/clientpositive/join26.q.out +++ b/ql/src/test/results/clientpositive/join26.q.out @@ -19,13 +19,15 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.key = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:y @@ -74,7 +76,7 @@ STAGE PLANS: 2 _col0 (type: string) Position of Big Table: 2 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -136,6 +138,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -316,6 +345,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join27.q.out b/ql/src/test/results/clientpositive/join27.q.out index 8b43f3f..0457943 100644 --- a/ql/src/test/results/clientpositive/join27.q.out +++ b/ql/src/test/results/clientpositive/join27.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) diff --git a/ql/src/test/results/clientpositive/join28.q.out b/ql/src/test/results/clientpositive/join28.q.out index 309bdcd..58ffed0 100644 --- a/ql/src/test/results/clientpositive/join28.q.out +++ b/ql/src/test/results/clientpositive/join28.q.out @@ -23,13 +23,15 @@ FROM JOIN srcpart z ON (subq.key1 = z.key and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-7 is a root stage - Stage-5 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-5 + Stage-8 is a root stage + Stage-6 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:z @@ -70,7 +72,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -115,6 +117,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -131,6 +148,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value FROM diff --git a/ql/src/test/results/clientpositive/join29.q.out b/ql/src/test/results/clientpositive/join29.q.out index b53143d..7b69c4f 100644 --- a/ql/src/test/results/clientpositive/join29.q.out +++ b/ql/src/test/results/clientpositive/join29.q.out @@ -20,15 +20,17 @@ FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 , consists of Stage-8, Stage-9, Stage-2 - Stage-8 has a backup stage: Stage-2 - Stage-5 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-2, Stage-5, Stage-6 - Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-1, Stage-5 , consists of Stage-9, Stage-10, Stage-2 Stage-9 has a backup stage: Stage-2 Stage-6 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2, Stage-6, Stage-7 + Stage-10 has a backup stage: Stage-2 + Stage-7 depends on stages: Stage-10 Stage-2 - Stage-4 is a root stage + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -70,10 +72,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Conditional Operator - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME1 @@ -87,7 +89,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -111,6 +113,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -127,7 +144,36 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator - Stage: Stage-9 + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, cnt1, cnt2 + Column Types: string, int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-10 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -141,7 +187,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -165,6 +211,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -206,8 +267,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/join3.q.out b/ql/src/test/results/clientpositive/join3.q.out index fb378f4..f3d198d 100644 --- a/ql/src/test/results/clientpositive/join3.q.out +++ b/ql/src/test/results/clientpositive/join3.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -92,6 +94,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -106,6 +123,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join30.q.out b/ql/src/test/results/clientpositive/join30.q.out index 3bd6db1..b82e287 100644 --- a/ql/src/test/results/clientpositive/join30.q.out +++ b/ql/src/test/results/clientpositive/join30.q.out @@ -15,13 +15,15 @@ INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-2 depends on stages: Stage-6 + Stage-7 is a root stage + Stage-2 depends on stages: Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-2 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_0:x @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join31.q.out b/ql/src/test/results/clientpositive/join31.q.out index 32eab4f..a36baa1 100644 --- a/ql/src/test/results/clientpositive/join31.q.out +++ b/ql/src/test/results/clientpositive/join31.q.out @@ -22,10 +22,12 @@ group by subq1.key POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 - Stage-3 depends on stages: Stage-7 + Stage-8 depends on stages: Stage-1 + Stage-3 depends on stages: Stage-8 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5 + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -60,7 +62,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:$hdt$_1:y @@ -133,6 +135,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -147,6 +164,35 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.dest_j1 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git a/ql/src/test/results/clientpositive/join32.q.out b/ql/src/test/results/clientpositive/join32.q.out index 0358968..418ae10 100644 --- a/ql/src/test/results/clientpositive/join32.q.out +++ b/ql/src/test/results/clientpositive/join32.q.out @@ -19,13 +19,15 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:y @@ -72,7 +74,7 @@ STAGE PLANS: 1 _col1 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -141,6 +143,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -321,6 +350,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join33.q.out b/ql/src/test/results/clientpositive/join33.q.out index 0358968..418ae10 100644 --- a/ql/src/test/results/clientpositive/join33.q.out +++ b/ql/src/test/results/clientpositive/join33.q.out @@ -19,13 +19,15 @@ FROM src1 x JOIN src y ON (x.key = y.key) JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-8 is a root stage - Stage-6 depends on stages: Stage-8 - Stage-0 depends on stages: Stage-6 + Stage-9 is a root stage + Stage-7 depends on stages: Stage-9 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 STAGE PLANS: - Stage: Stage-8 + Stage: Stage-9 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:y @@ -72,7 +74,7 @@ STAGE PLANS: 1 _col1 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -141,6 +143,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -321,6 +350,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join34.q.out b/ql/src/test/results/clientpositive/join34.q.out index 117afac..f1d7c40 100644 --- a/ql/src/test/results/clientpositive/join34.q.out +++ b/ql/src/test/results/clientpositive/join34.q.out @@ -27,13 +27,15 @@ FROM JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-5 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-5 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -59,7 +61,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-5 + Stage: Stage-6 Map Reduce Map Operator Tree: TableScan @@ -121,6 +123,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -180,6 +209,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -312,6 +368,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value FROM diff --git a/ql/src/test/results/clientpositive/join35.q.out b/ql/src/test/results/clientpositive/join35.q.out index 1f90df9..f75a09e 100644 --- a/ql/src/test/results/clientpositive/join35.q.out +++ b/ql/src/test/results/clientpositive/join35.q.out @@ -28,11 +28,13 @@ JOIN src1 x ON (x.key = subq1.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1, Stage-4 - Stage-6 depends on stages: Stage-7 - Stage-0 depends on stages: Stage-6 + Stage-8 depends on stages: Stage-1, Stage-5 + Stage-7 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-7 Stage-3 depends on stages: Stage-0 - Stage-4 is a root stage + Stage-9 depends on stages: Stage-3, Stage-4 + Stage-4 depends on stages: Stage-7 + Stage-5 is a root stage STAGE PLANS: Stage: Stage-1 @@ -143,7 +145,7 @@ STAGE PLANS: GatherStats: false MultiFileSpray: false - Stage: Stage-7 + Stage: Stage-8 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:x @@ -169,7 +171,7 @@ STAGE PLANS: 1 _col0 (type: string) Position of Big Table: 0 - Stage: Stage-6 + Stage: Stage-7 Map Reduce Map Operator Tree: TableScan @@ -221,6 +223,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false TableScan GatherStats: false Union @@ -270,6 +299,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Local Work: Map Reduce Local Work Path -> Alias: @@ -299,7 +355,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe #### A masked pattern was here #### Partition - base file name: -mr-10003 + base file name: -mr-10004 input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: @@ -400,10 +456,87 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, int + Table: default.dest_j1 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10003 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types struct,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan alias: x1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE GatherStats: false diff --git a/ql/src/test/results/clientpositive/join36.q.out b/ql/src/test/results/clientpositive/join36.q.out index 43a091f..9971a9a 100644 --- a/ql/src/test/results/clientpositive/join36.q.out +++ b/ql/src/test/results/clientpositive/join36.q.out @@ -57,13 +57,15 @@ SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -86,7 +88,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -119,6 +121,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -135,6 +152,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, int, int + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join37.q.out b/ql/src/test/results/clientpositive/join37.q.out index b0a2ee3..eb50db5 100644 --- a/ql/src/test/results/clientpositive/join37.q.out +++ b/ql/src/test/results/clientpositive/join37.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_0:x @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -79,6 +81,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -95,6 +112,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join39.q.out b/ql/src/test/results/clientpositive/join39.q.out index 628a44a..8dbd2ba 100644 --- a/ql/src/test/results/clientpositive/join39.q.out +++ b/ql/src/test/results/clientpositive/join39.q.out @@ -17,13 +17,15 @@ SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-5 is a root stage - Stage-4 depends on stages: Stage-5 - Stage-0 depends on stages: Stage-4 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: - Stage: Stage-5 + Stage: Stage-6 Map Reduce Local Work Alias -> Map Local Tables: $hdt$_1:src @@ -46,7 +48,7 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -72,6 +74,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, key1, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(key1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -88,6 +105,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value, key1, val2 + Column Types: string, string, string, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/join4.q.out b/ql/src/test/results/clientpositive/join4.q.out index d9510f5..83f8f75 100644 --- a/ql/src/test/results/clientpositive/join4.q.out +++ b/ql/src/test/results/clientpositive/join4.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join43.q.out b/ql/src/test/results/clientpositive/join43.q.out index e8c7278..770bdb7 100644 --- a/ql/src/test/results/clientpositive/join43.q.out +++ b/ql/src/test/results/clientpositive/join43.q.out @@ -209,46 +209,46 @@ from ( ) list POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: purchase_history - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + alias: cart_history + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s is not null (type: boolean) - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), time (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) TableScan - alias: cart_history - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + alias: purchase_history + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s is not null (type: boolean) - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), time (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator @@ -257,14 +257,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > _col3) (type: boolean) + predicate: (_col3 > _col1) (type: boolean) Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(_col3) - keys: _col0 (type: string), _col1 (type: int) + aggregations: max(_col1) + keys: _col2 (type: string), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -275,7 +275,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -306,16 +306,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - TableScan alias: events Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -330,22 +324,32 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col1 (type: int) - outputColumnNames: _col0 + 0 _col0 (type: string), _col1 (type: int) + 1 _col0 (type: string), _col2 (type: int) + outputColumnNames: _col2 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col2 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -427,46 +431,46 @@ from ( ) list POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-2 is a root stage Stage-3 depends on stages: Stage-2 - Stage-0 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-1 STAGE PLANS: - Stage: Stage-1 + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - alias: purchase_history - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + alias: cart_history + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s is not null (type: boolean) - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), time (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) TableScan - alias: cart_history - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + alias: purchase_history + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: s is not null (type: boolean) - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: s (type: string), time (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 57 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator @@ -475,14 +479,14 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col1, _col2, _col3 Statistics: Num rows: 6 Data size: 39 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col1 > _col3) (type: boolean) + predicate: (_col3 > _col1) (type: boolean) Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE Group By Operator - aggregations: max(_col3) - keys: _col0 (type: string), _col1 (type: int) + aggregations: max(_col1) + keys: _col2 (type: string), _col3 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -493,7 +497,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 + Stage: Stage-3 Map Reduce Map Operator Tree: TableScan @@ -520,17 +524,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-3 + Stage: Stage-1 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col2 (type: int) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col2 (type: int) - Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) - TableScan alias: events Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -546,17 +543,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col3 (type: int) Statistics: Num rows: 6 Data size: 79 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int) + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col2 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col2 (type: int) + Statistics: Num rows: 1 Data size: 6 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col2 (type: int) - 1 _col0 (type: string), _col3 (type: int) - outputColumnNames: _col0, _col1, _col2, _col4, _col5 + 0 _col0 (type: string), _col3 (type: int) + 1 _col0 (type: string), _col2 (type: int) + outputColumnNames: _col1, _col2, _col4, _col5, _col6 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int), _col4 (type: string), _col5 (type: int) + expressions: _col4 (type: string), _col5 (type: int), _col6 (type: int), _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 6 Data size: 86 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/join5.q.out b/ql/src/test/results/clientpositive/join5.q.out index bf1cde3..4469da6 100644 --- a/ql/src/test/results/clientpositive/join5.q.out +++ b/ql/src/test/results/clientpositive/join5.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join6.q.out b/ql/src/test/results/clientpositive/join6.q.out index 77c8c3a..19b12e8 100644 --- a/ql/src/test/results/clientpositive/join6.q.out +++ b/ql/src/test/results/clientpositive/join6.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -98,6 +100,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -112,6 +129,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join7.q.out b/ql/src/test/results/clientpositive/join7.q.out index f0f437c..31a6032 100644 --- a/ql/src/test/results/clientpositive/join7.q.out +++ b/ql/src/test/results/clientpositive/join7.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -126,6 +128,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -140,6 +157,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join8.q.out b/ql/src/test/results/clientpositive/join8.q.out index c1035b4..7640597 100644 --- a/ql/src/test/results/clientpositive/join8.q.out +++ b/ql/src/test/results/clientpositive/join8.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -101,6 +103,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -115,6 +132,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/join9.q.out b/ql/src/test/results/clientpositive/join9.q.out index befc47a..ea9380f 100644 --- a/ql/src/test/results/clientpositive/join9.q.out +++ b/ql/src/test/results/clientpositive/join9.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -210,6 +212,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -243,6 +272,83 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/join_map_ppr.q.out b/ql/src/test/results/clientpositive/join_map_ppr.q.out index dba176d..46986ee 100644 --- a/ql/src/test/results/clientpositive/join_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/join_map_ppr.q.out @@ -27,6 +27,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -131,6 +132,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -186,6 +203,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -228,6 +274,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -605,6 +659,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -688,7 +743,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -709,6 +764,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -764,6 +835,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-9 Conditional Operator @@ -783,7 +883,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -806,6 +906,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -820,7 +928,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -850,7 +958,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -871,7 +979,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -907,7 +1015,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -937,7 +1045,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -958,7 +1066,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 diff --git a/ql/src/test/results/clientpositive/lb_fs_stats.q.out b/ql/src/test/results/clientpositive/lb_fs_stats.q.out index b07192b..9bc96e4 100644 --- a/ql/src/test/results/clientpositive/lb_fs_stats.q.out +++ b/ql/src/test/results/clientpositive/lb_fs_stats.q.out @@ -46,7 +46,7 @@ Database: default Table: test_tab #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out index 35e6827..7e25f6f 100644 --- a/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out +++ b/ql/src/test/results/clientpositive/limit_pushdown_negative.q.out @@ -237,9 +237,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -279,6 +283,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -308,14 +327,49 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.3 value expressions: _col0 (type: string), _col1 (type: double) Reduce Operator Tree: Select Operator @@ -337,6 +391,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 20 Data size: 200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -348,6 +417,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_3 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/lineage1.q.out b/ql/src/test/results/clientpositive/lineage1.q.out index cc905d6..011828d 100644 --- a/ql/src/test/results/clientpositive/lineage1.q.out +++ b/ql/src/test/results/clientpositive/lineage1.q.out @@ -39,6 +39,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -111,6 +112,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE @@ -126,6 +140,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_l1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -149,6 +189,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_l1 + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out index f70f8b2..ff0ad8b 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_1.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +187,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -200,6 +254,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -240,7 +302,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -285,7 +347,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -357,7 +419,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out index 6920dcc..e3b6f36 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_11.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +140,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src PREHOOK: type: QUERY @@ -195,7 +257,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -238,7 +300,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out index 55acbb7..990ba9f 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_12.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 16), compute_stats(col2, 16), compute_stats(col3, 16), compute_stats(col4, 16), compute_stats(col5, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +140,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '11') select 1, key, 1, value, 1 from src PREHOOK: type: QUERY @@ -201,7 +263,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -246,7 +308,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 @@ -338,7 +400,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out index 3a1d2a4..8455467 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_13.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), '2008-04-08' (type: string), '2013-01-23+18:00:99' (type: string) + outputColumnNames: col1, col2, col3, col4, col5, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(col1, 16), compute_stats(col2, 16), compute_stats(col3, 16), compute_stats(col4, 16), compute_stats(col5, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -120,6 +140,40 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types struct:struct:struct:struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -153,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: col1, col2, col3, col4, col5 + Column Types: string, string, string, string, string + Table: default.list_bucketing_mul_col + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_mul_col partition (ds = '2008-04-08', hr = '2013-01-23+18:00:99') select 1, key, 1, value, 1 from src PREHOOK: type: QUERY @@ -201,7 +263,7 @@ Database: default Table: list_bucketing_mul_col #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\",\"col4\":\"true\",\"col5\":\"true\"}} numFiles 4 numRows 500 rawDataSize 6312 @@ -246,7 +308,7 @@ STAGE PLANS: ds 2008-04-08 hr 2013-01-23+18:00:99 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"col1":"true","col2":"true","col3":"true","col4":"true","col5":"true"}} bucket_count -1 column.name.delimiter , columns col1,col2,col3,col4,col5 diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out index ab30b7d..13df7c9 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_14.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -116,6 +133,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -149,6 +195,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing + Is Table Level Stats: true + PREHOOK: query: insert overwrite table list_bucketing select * from src PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -177,7 +231,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -283,7 +337,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -304,7 +358,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out index 01bc19c..7b72c66 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_2.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -250,7 +312,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -311,7 +373,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out index 52646a2..de4e086 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_3.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -161,6 +181,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -194,6 +248,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds='2008-04-08', hr='11') select key, value from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -231,7 +293,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 10624 @@ -303,7 +365,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out index 84ada2b..9e8560a 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_4.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -250,7 +312,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -286,6 +348,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -331,6 +394,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -433,6 +515,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -474,6 +590,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -624,7 +748,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -685,7 +809,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out index 71d7e16..3e55d2a 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_5.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -65,6 +66,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -167,6 +187,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -200,6 +254,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart @@ -240,7 +302,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -285,7 +347,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -362,7 +424,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -408,7 +470,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index 8e20ff1..5217a3f 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -254,7 +316,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +359,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -333,6 +395,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -379,6 +442,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,6 +563,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -523,6 +639,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -681,7 +805,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -724,7 +848,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -785,7 +909,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -831,7 +955,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 58bf240..286a045 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -254,7 +316,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +359,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 984 rawDataSize 9488 @@ -333,6 +395,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -379,6 +442,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -481,6 +563,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -523,6 +639,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -681,7 +805,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 16 rawDataSize 136 @@ -724,7 +848,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 984 rawDataSize 9488 @@ -785,7 +909,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -831,7 +955,7 @@ STAGE PLANS: ds 2008-04-08 hr b1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index b38d332..071f0de 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_dynamic_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_dynamic_part partition (ds = '2008-04-08', hr) select key, value, if(key % 100 == 0, 'a1', 'b1') from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -254,7 +316,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 16 rawDataSize 136 @@ -297,7 +359,7 @@ Database: default Table: list_bucketing_dynamic_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 984 rawDataSize 9488 @@ -414,7 +476,7 @@ STAGE PLANS: ds 2008-04-08 hr a1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out index 624e3ac..221b3c9 100644 --- a/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_dml_9.q.out @@ -26,6 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -69,6 +70,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -171,6 +191,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -204,6 +258,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -250,7 +312,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -286,6 +348,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -331,6 +394,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -433,6 +515,40 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -474,6 +590,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + Stage: Stage-3 Merge File Operator Map Operator Tree: @@ -624,7 +748,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 1000 rawDataSize 9624 @@ -685,7 +809,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out index 17c7afe..98b9b13 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_1.q.out @@ -47,7 +47,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -97,7 +97,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -181,7 +181,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -266,7 +266,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -349,7 +349,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out index 5a326f6..4cafefa 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_2.q.out @@ -47,7 +47,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -97,7 +97,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -181,7 +181,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -268,7 +268,7 @@ STAGE PLANS: ds 1 hr 4 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out index 3ff221d..9931afd 100644 --- a/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out +++ b/ql/src/test/results/clientpositive/list_bucket_query_multiskew_3.q.out @@ -43,7 +43,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -103,7 +103,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 500 rawDataSize 5312 @@ -168,7 +168,7 @@ Database: default Table: fact_daily #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -211,7 +211,7 @@ STAGE PLANS: ds 1 hr 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -322,7 +322,7 @@ STAGE PLANS: ds 1 hr 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -408,7 +408,7 @@ STAGE PLANS: ds 1 hr 3 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out index d05bf64..a9eac5af 100644 --- a/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out +++ b/ql/src/test/results/clientpositive/llap/acid_bucket_pruning.q.out @@ -43,22 +43,22 @@ STAGE PLANS: alias: acidtbldefault filterExpr: (a = 1) (type: boolean) buckets included: [1,] of 16 - Statistics: Num rows: 7972 Data size: 31889 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7972 Data size: 31848 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (a = 1) (type: boolean) - Statistics: Num rows: 3986 Data size: 15944 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3986 Data size: 15944 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 3986 Data size: 15944 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -86,6 +86,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"a":"true"}} bucket_count 16 bucket_field_name a column.name.delimiter , @@ -109,6 +110,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: + COLUMN_STATS_ACCURATE {"COLUMN_STATS":{"a":"true"}} bucket_count 16 bucket_field_name a column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out b/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out index 0d5ba01..f0cf74c 100644 --- a/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out +++ b/ql/src/test/results/clientpositive/llap/alter_merge_stats_orc.q.out @@ -89,7 +89,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -241,7 +241,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -290,7 +290,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 diff --git a/ql/src/test/results/clientpositive/llap/auto_join1.q.out b/ql/src/test/results/clientpositive/llap/auto_join1.q.out index 6a0a1d5..e85e4f1 100644 --- a/ql/src/test/results/clientpositive/llap/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_join1.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -81,8 +83,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out index 841ef14..d05cc8c 100644 --- a/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_smb_mapjoin_14.q.out @@ -64,40 +64,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -107,10 +107,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -174,25 +174,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -200,17 +200,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reducer 2 Execution mode: llap @@ -219,17 +219,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -238,10 +238,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -330,25 +330,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -356,43 +356,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -400,18 +400,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 @@ -422,12 +422,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -439,14 +439,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -459,12 +459,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -547,40 +547,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -590,10 +590,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -664,40 +664,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -707,10 +707,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -805,40 +805,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -848,10 +848,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -934,40 +934,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -977,10 +977,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1042,38 +1042,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1085,15 +1085,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1102,10 +1102,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1162,40 +1162,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1205,10 +1205,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1271,36 +1271,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1309,15 +1309,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1327,10 +1327,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1409,40 +1409,40 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1452,10 +1452,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1533,6 +1533,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1540,30 +1542,34 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1571,32 +1577,88 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Execution mode: llap + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1614,6 +1676,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -1747,6 +1823,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1755,32 +1833,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1788,53 +1868,109 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1852,6 +1988,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out index ab94c37..c1459d5 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_10.q.out @@ -80,14 +80,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -96,15 +96,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 5 - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -112,14 +112,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -128,15 +128,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 5 - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -144,24 +144,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -171,10 +171,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -248,34 +248,34 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: final outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -284,15 +284,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -303,10 +303,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out index 9c37848..0bb5b0f 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_13.q.out @@ -71,6 +71,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -79,21 +81,23 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -102,54 +106,110 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + 1 Map 4 + Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -167,6 +227,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -287,6 +361,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -295,77 +371,122 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Operator Tree: + TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reducer 2 Execution mode: llap - LLAP IO: no inputs + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -383,6 +504,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -503,6 +638,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -511,77 +648,122 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Map Operator Tree: + TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Map Join Operator + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE + Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 2 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2976 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 16 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Reducer 2 Execution mode: llap - LLAP IO: no inputs + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -599,6 +781,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out index c618531..337d5ae 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_14.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -70,15 +70,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -86,16 +86,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -105,10 +105,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -164,27 +164,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 189 Data size: 1891 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 189 Data size: 756 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 189 Data size: 1891 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 189 Data size: 756 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -193,15 +193,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 207 Data size: 2080 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 189 Data size: 1512 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -212,10 +212,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out index 286939b..db0af9d 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_15.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -70,15 +70,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -86,16 +86,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -105,10 +105,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -143,27 +143,27 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -172,15 +172,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 22 Data size: 176 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -191,10 +191,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out index 9e60536..34bea41 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_6.q.out @@ -84,46 +84,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -143,6 +127,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -152,27 +155,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -211,46 +230,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -270,6 +273,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -279,27 +301,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -338,46 +376,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -397,6 +419,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -406,27 +447,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,46 +522,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -524,6 +565,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -533,27 +593,43 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1219 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1219 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3109 Data size: 24872 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -598,36 +674,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -636,15 +712,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3253 Data size: 26024 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -654,10 +730,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -696,63 +772,66 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -764,27 +843,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -823,46 +918,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -882,6 +961,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -891,27 +989,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -950,46 +1064,30 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan @@ -1009,6 +1107,25 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1018,27 +1135,43 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1219 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1219 Data size: 4876 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 3109 Data size: 24872 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1083,36 +1216,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1121,15 +1254,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3253 Data size: 26024 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1139,10 +1272,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1181,63 +1314,66 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap + LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1249,27 +1385,43 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1168 Data size: 4672 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out index b69d0bd..7043921 100644 --- a/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/llap/auto_sortmerge_join_9.q.out @@ -65,14 +65,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -81,15 +81,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -97,19 +97,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -119,10 +119,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -180,14 +180,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -197,18 +197,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -216,19 +216,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -239,10 +239,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -316,14 +316,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -333,36 +333,36 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -372,17 +372,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -391,10 +391,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -484,14 +484,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -501,18 +501,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -520,33 +520,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -556,18 +556,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 6 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -575,19 +575,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -598,7 +598,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -608,14 +608,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Reducer 5 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -628,12 +628,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -717,14 +717,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -733,15 +733,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -749,19 +749,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -771,10 +771,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -846,14 +846,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -862,15 +862,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -878,19 +878,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -900,10 +900,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -999,14 +999,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1015,15 +1015,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1031,19 +1031,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1053,10 +1053,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1140,14 +1140,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1156,15 +1156,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1172,19 +1172,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1194,10 +1194,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1259,14 +1259,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1275,15 +1275,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1291,19 +1291,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1313,10 +1313,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1374,14 +1374,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1390,15 +1390,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1406,19 +1406,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1428,10 +1428,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1487,14 +1487,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1503,15 +1503,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1519,19 +1519,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1541,10 +1541,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1610,14 +1610,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1629,15 +1629,15 @@ STAGE PLANS: input vertices: 1 Map 3 2 Map 4 - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1645,38 +1645,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1686,10 +1686,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1769,14 +1769,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1785,15 +1785,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1801,19 +1801,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1823,10 +1823,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1896,14 +1896,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1912,15 +1912,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1928,19 +1928,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1950,10 +1950,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2011,14 +2011,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2028,18 +2028,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2047,19 +2047,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2070,10 +2070,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2147,14 +2147,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2164,36 +2164,36 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2203,17 +2203,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -2222,10 +2222,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2315,14 +2315,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2332,18 +2332,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2351,33 +2351,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2387,18 +2387,18 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 6 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2406,19 +2406,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2429,7 +2429,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2439,14 +2439,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Reducer 5 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2459,12 +2459,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -2548,14 +2548,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2564,15 +2564,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2580,19 +2580,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2602,10 +2602,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2677,14 +2677,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2693,15 +2693,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2709,19 +2709,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2731,10 +2731,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2830,14 +2830,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2846,15 +2846,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2862,19 +2862,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2884,10 +2884,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2971,14 +2971,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2987,15 +2987,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3003,19 +3003,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3025,10 +3025,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3086,14 +3086,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3102,15 +3102,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3118,19 +3118,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3140,10 +3140,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3199,14 +3199,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3215,15 +3215,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3231,19 +3231,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3253,10 +3253,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3322,14 +3322,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3341,15 +3341,15 @@ STAGE PLANS: input vertices: 1 Map 3 2 Map 4 - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3357,38 +3357,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3398,10 +3398,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3481,14 +3481,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -3497,15 +3497,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -3513,19 +3513,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3535,10 +3535,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucket2.q.out b/ql/src/test/results/clientpositive/llap/bucket2.q.out index be886b0..dbcec13 100644 --- a/ql/src/test/results/clientpositive/llap/bucket2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket2.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -140,6 +141,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,6 +213,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/bucket3.q.out b/ql/src/test/results/clientpositive/llap/bucket3.q.out index 8e6d85c..3bc765e 100644 --- a/ql/src/test/results/clientpositive/llap/bucket3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket3.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -138,6 +140,61 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 90000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -174,6 +231,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/bucket4.q.out b/ql/src/test/results/clientpositive/llap/bucket4.q.out index b4f7ca1..ce5733c 100644 --- a/ql/src/test/results/clientpositive/llap/bucket4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket4.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -142,6 +143,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -180,6 +216,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket4_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket4_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/bucket5.q.out b/ql/src/test/results/clientpositive/llap/bucket5.q.out index ef5067b..f15e3ff 100644 --- a/ql/src/test/results/clientpositive/llap/bucket5.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket5.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4, Stage-5 + Stage-12 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 @@ -43,7 +45,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -173,10 +177,57 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Reducer 3 Execution mode: llap Needs Tagging: false Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: Select Operator expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -213,6 +264,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 5 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -251,6 +349,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucketed_table + Is Table Level Stats: true + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.unbucketed_table + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -514,7 +628,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} SORTBUCKETCOLSPREFIX TRUE numFiles 2 numRows 500 diff --git a/ql/src/test/results/clientpositive/llap/bucket6.q.out b/ql/src/test/results/clientpositive/llap/bucket6.q.out index 20895f8..fe2d3a6 100644 --- a/ql/src/test/results/clientpositive/llap/bucket6.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket6.q.out @@ -17,6 +17,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -24,6 +25,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -58,6 +60,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2000 Data size: 356000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +105,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_bucket + PREHOOK: query: insert into table src_bucket select key,value from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out index 4ac5ac9..7ae05e3 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_groupby.q.out @@ -63,22 +63,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -91,11 +91,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -104,13 +104,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -191,22 +191,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -219,11 +219,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -232,13 +232,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -292,22 +292,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: length(key) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -320,13 +320,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -373,22 +373,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: abs(length(key)) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -401,13 +401,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -455,22 +455,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -483,11 +483,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -496,13 +496,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -557,22 +557,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -585,11 +585,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -598,13 +598,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -658,22 +658,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -685,10 +685,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1043,22 +1043,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1070,10 +1070,10 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1158,23 +1158,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1187,11 +1187,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1200,13 +1200,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1261,22 +1261,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1289,11 +1289,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1302,13 +1302,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 21186 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1464,23 +1464,23 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) bucketGroup: true keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Execution mode: llap @@ -1493,11 +1493,11 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1506,13 +1506,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1567,22 +1567,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: clustergroupby - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string), key (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col1 (type: string), _col0 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) Execution mode: llap @@ -1595,15 +1595,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) Reducer 3 @@ -1612,13 +1612,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucket_many.q.out b/ql/src/test/results/clientpositive/llap/bucket_many.q.out index 72b0ec4..bfc924d 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_many.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_many.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -140,6 +142,53 @@ STAGE PLANS: TotalFiles: 256 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,6 +226,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket_many + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket_many select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out index 964d058..53a0314 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez1.q.out @@ -132,19 +132,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -152,14 +152,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -169,14 +169,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -217,39 +217,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int), value (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -259,11 +259,11 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -272,15 +272,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 275 Data size: 5121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 308 Data size: 2464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -289,10 +289,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -346,101 +346,105 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 1573 Data size: 12584 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: d - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -498,101 +502,105 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 1 (CUSTOM_EDGE), Map 2 (CUSTOM_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE), Map 4 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 617 Data size: 2468 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 1573 Data size: 12584 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + alias: d + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 0 Map 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 4 + Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -680,14 +688,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -697,14 +705,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 25289 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 25289 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 25289 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -756,22 +764,58 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 3 (CUSTOM_EDGE) + Map 2 <- Map 4 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 242 Data size: 24926 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -780,67 +824,47 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + 1 Map 4 + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), substr(_col2, 5) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: tab - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + alias: tab + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -848,32 +872,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 5121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 275 Data size: 5121 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 302 Data size: 5633 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 302 Data size: 5633 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 302 Data size: 5633 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -900,22 +909,58 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (CUSTOM_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Reducer 3 (CUSTOM_EDGE) + Map 2 <- Map 4 (CUSTOM_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: b + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3 + input vertices: + 1 Reducer 3 + Statistics: Num rows: 298 Data size: 30694 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 298 Data size: 30694 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 298 Data size: 30694 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan alias: x - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -924,66 +969,46 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + 1 Map 4 + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), substr(_col1, 5) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Reducer 2 + Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -991,32 +1016,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 2476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: double), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 133 Data size: 2476 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 4 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 117 Data size: 1404 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: double) Stage: Stage-0 Fetch Operator @@ -1048,14 +1058,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1064,18 +1074,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col4 input vertices: 1 Map 2 2 Map 3 - Statistics: Num rows: 1100 Data size: 20486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1100 Data size: 20486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 20486 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1085,40 +1095,40 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan - alias: c - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1145,21 +1155,22 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (CUSTOM_EDGE) + Map 1 <- Map 3 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1168,70 +1179,77 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + 1 Map 3 + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 617 Data size: 58615 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1276 Data size: 237336 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1292,14 +1310,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1309,14 +1327,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1392,14 +1410,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1409,14 +1427,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Reducer 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: double), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 550 Data size: 52250 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1462,60 +1480,66 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 565 Data size: 105090 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 565 Data size: 105090 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1569,19 +1593,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1589,14 +1613,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1606,14 +1630,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1640,93 +1664,100 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1168 Data size: 9344 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1751,79 +1782,86 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 47094 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col2 (type: string) + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Select Operator expressions: _col2 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE Dynamic Partitioning Event Operator Target column: ds (string) - Target Input: b + Target Input: a Partition key expr: ds - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL - Target Vertex: Map 2 + Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 1 Execution mode: llap LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 97312 Basic stats: COMPLETE Column stats: PARTIAL - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: key (type: int), value (type: string), ds (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: PARTIAL - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int), _col2 (type: string) - 1 _col0 (type: int), _col2 (type: string) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 0 Map 1 - Statistics: Num rows: 60500 Data size: 12826000 Basic stats: COMPLETE Column stats: PARTIAL - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 60500 Data size: 484000 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false - Statistics: Num rows: 60500 Data size: 484000 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 2 Execution mode: llap - LLAP IO: no inputs + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int), _col2 (type: string) + 1 _col0 (type: int), _col2 (type: string) + outputColumnNames: _col0, _col1, _col4 + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 617 Data size: 114762 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1870,14 +1908,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1886,29 +1924,29 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1918,10 +1956,10 @@ STAGE PLANS: aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1989,14 +2027,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -2005,29 +2043,29 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 3 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2037,10 +2075,10 @@ STAGE PLANS: aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out index f6500eb..b6112a6 100644 --- a/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucket_map_join_tez2.q.out @@ -121,93 +121,100 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (CUSTOM_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 605 Data size: 11267 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Execution mode: llap - LLAP IO: no inputs - Map 2 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1168 Data size: 9344 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2979 Data size: 23832 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -254,30 +261,30 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -289,29 +296,36 @@ STAGE PLANS: expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col1, _col3 - input vertices: - 0 Map 1 - Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 550 Data size: 97900 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 590 Data size: 109740 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 590 Data size: 109740 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 590 Data size: 109740 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -341,14 +355,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key > 1) and (key > 2)) (type: boolean) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -358,10 +372,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 60 Data size: 1126 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1096 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 60 Data size: 1126 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 137 Data size: 1096 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -372,19 +386,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key > 2) and (key > 1)) (type: boolean) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs @@ -416,14 +430,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1) (type: boolean) - Statistics: Num rows: 166 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -433,10 +447,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 182 Data size: 3400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 3400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -447,19 +461,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key > 2) and (key > 1)) (type: boolean) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs @@ -491,33 +505,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key > 1) and (key > 2)) (type: boolean) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 55 Data size: 1024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 220 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: tab_part - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 2) (type: boolean) - Statistics: Num rows: 166 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 166 Data size: 3091 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 664 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -527,10 +541,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 0 Map 1 - Statistics: Num rows: 182 Data size: 3400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 3400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -557,46 +571,59 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) + Map 3 <- Reducer 2 (CUSTOM_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 117 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 242 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -606,24 +633,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 121 Data size: 2251 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 117 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 117 Data size: 468 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -644,46 +659,47 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: tab - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 10192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 112 Data size: 10192 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -694,28 +710,35 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 121 Data size: 2251 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 UDFToDouble(_col0) (type: double) - 1 UDFToDouble(_col0) (type: double) - outputColumnNames: _col0, _col2 - input vertices: - 1 Map 3 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Statistics: Num rows: 112 Data size: 10192 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 112 Data size: 10192 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 UDFToDouble(_col0) (type: double) + 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 231 Data size: 42042 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 231 Data size: 42042 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 231 Data size: 42042 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out index 6338f66..e07909c 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin1.q.out @@ -393,13 +393,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -478,7 +480,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -605,6 +607,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -641,6 +690,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b @@ -759,13 +816,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -844,7 +903,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -950,7 +1009,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -971,6 +1030,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -984,7 +1090,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1007,6 +1113,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out index 7e3d606..07baff3 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin2.q.out @@ -105,13 +105,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -192,7 +194,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -319,6 +321,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -355,6 +404,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -477,13 +534,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -564,7 +623,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -670,7 +729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -691,6 +750,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -704,7 +810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -727,6 +833,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -866,13 +980,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -953,7 +1069,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1108,7 +1224,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1129,6 +1245,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 7246 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1142,7 +1305,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1165,6 +1328,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out index 00cf0f1..492f581 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin3.q.out @@ -129,13 +129,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -216,7 +218,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -343,6 +345,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -379,6 +428,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b @@ -501,13 +558,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -588,7 +647,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -694,7 +753,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -715,6 +774,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -728,7 +834,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -751,6 +857,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out index 7b983f0..bafb5d7 100644 --- a/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketmapjoin4.q.out @@ -129,13 +129,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -214,7 +216,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -339,6 +341,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -375,6 +424,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin b @@ -485,13 +542,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -570,7 +629,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -674,7 +733,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -695,6 +754,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -708,7 +814,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -731,6 +837,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin b diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out index e4fed11..9d44103 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_2.q.out @@ -93,6 +93,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -101,25 +102,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -127,14 +129,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -144,16 +146,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -163,15 +165,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 17 Data size: 4641 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -191,6 +229,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -272,6 +317,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -280,25 +326,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 20 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 20 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 1860 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 5540 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 20 Data size: 300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 5540 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -306,14 +353,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15036 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -323,16 +370,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6405 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -342,15 +389,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 35 Data size: 9555 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -370,6 +453,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -475,6 +565,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -483,25 +574,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -509,14 +601,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 168 Data size: 2816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 168 Data size: 2816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 15792 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 168 Data size: 2816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 168 Data size: 46704 Basic stats: COMPLETE Column stats: PARTIAL Map Join Operator condition map: Inner Join 0 to 1 @@ -526,16 +618,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6405 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -545,15 +637,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 184 Data size: 3097 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 6580 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 35 Data size: 9555 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -573,6 +701,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -684,6 +819,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -692,25 +828,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -718,14 +855,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -735,16 +872,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -754,15 +891,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 17 Data size: 4641 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -782,6 +955,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -875,6 +1055,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -883,25 +1064,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -909,14 +1091,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), concat(value, value) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15792 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -926,16 +1108,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 6324 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -945,15 +1127,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 17 Data size: 4641 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -973,6 +1191,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.v1, b.v2) FROM @@ -1066,6 +1291,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1074,25 +1300,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1100,14 +1327,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1117,16 +1344,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (_col0 + _col0) (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1136,15 +1363,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 3196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 17 Data size: 4641 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1164,6 +1427,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key+a.key, concat(a.value, b.value) FROM diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out index 67e925a..3c04d4b 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_6.q.out @@ -71,6 +71,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -79,25 +80,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -105,14 +107,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -122,16 +124,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -141,15 +143,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -169,6 +207,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key2, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -257,6 +302,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -265,25 +311,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -291,14 +338,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -308,16 +355,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -327,15 +374,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -355,6 +438,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key, subq1.key2, subq1.value from ( @@ -443,6 +533,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -451,25 +542,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -477,14 +569,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -494,16 +586,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -513,15 +605,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -541,6 +669,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key2, subq1.key, subq1.value from @@ -564,6 +699,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -572,25 +708,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -598,14 +735,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -615,16 +752,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -634,15 +771,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -662,6 +835,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from @@ -691,6 +871,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -699,25 +880,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -725,14 +907,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -742,16 +924,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -761,15 +943,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -789,6 +1007,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from ( @@ -895,6 +1120,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -903,25 +1129,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -929,14 +1156,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -946,16 +1173,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: +- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -965,15 +1192,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -993,6 +1256,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.k2, subq2.k1, subq2.value from ( @@ -1109,6 +1379,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1117,25 +1388,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 970 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 10 Data size: 171 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1820 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1143,14 +1415,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 8232 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 84 Data size: 1651 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 15372 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1160,16 +1432,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col6 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 187 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), concat(_col2, _col6) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: -- Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1179,15 +1451,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1816 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 1 Data size: 277 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1529 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1525 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1207,3 +1515,10 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table4 + diff --git a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out index 8691d0d..2ca6e9f 100644 --- a/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/llap/bucketsortoptimize_insert_7.q.out @@ -71,6 +71,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -79,25 +80,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 712 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -105,14 +107,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 716 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -122,16 +124,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col4 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 915 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col4) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -141,15 +143,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 5 Data size: 1365 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -169,6 +207,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -254,6 +299,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -262,25 +308,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -288,14 +335,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((key = 0) or (key = 5)) and key is not null) (type: boolean) - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -305,16 +352,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 915 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -324,15 +371,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 92 Data size: 1548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 5 Data size: 1365 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -352,6 +435,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -443,6 +533,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -451,25 +542,26 @@ STAGE PLANS: Edges: Map 2 <- Map 1 (BROADCAST_EDGE) Reducer 3 <- Map 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_table1 - Statistics: Num rows: 10 Data size: 150 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) - Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -477,14 +569,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_table2 - Statistics: Num rows: 84 Data size: 1408 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 84 Data size: 7896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and ((key = 0) or (key = 5))) (type: boolean) - Statistics: Num rows: 28 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 469 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -494,16 +586,16 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 915 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), concat(_col1, _col3) (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -513,15 +605,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 30 Data size: 515 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 5 Data size: 1365 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -541,6 +669,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM diff --git a/ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out b/ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out index 4bc28ce..14f9619 100644 --- a/ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out +++ b/ql/src/test/results/clientpositive/llap/cbo_rp_lineage2.q.out @@ -36,7 +36,7 @@ PREHOOK: query: insert into table dest1 select * from src2 PREHOOK: type: QUERY PREHOOK: Input: default@src2 PREHOOK: Output: default@dest1 -{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select key k, dest1.value from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -467,20 +467,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"src1.value is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"src2.value2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select * from src1 where length(key) > 2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -530,7 +530,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[],"vertices":[]} +{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE @@ -552,7 +552,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1 -{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery1:_u1-subquery1:p1.key = j-subquery1:_u1-subquery1:t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery2:_u1-subquery2:p2.key = j-subquery2:_u1-subquery2:t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery1:_u1-subquery1:p1.key = j-subquery1:_u1-subquery1:t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(j-subquery2:_u1-subquery2:p2.key = j-subquery2:_u1-subquery2:t2.key)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(key), 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(value, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.emp_id = emd:em:m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12,9],"targets":[0,1,2,3,4,5],"expression":"(emd:em:e.dept_id = emd:d.dept_id AND emd:em:e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept.dept_name, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 16)","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project.project_id, 16)","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project.project_name, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -603,7 +603,7 @@ PREHOOK: Output: default@dest_l2 PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l2 -{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(UDFToByte(tmp_values_col2), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(UDFToInteger(tmp_values_col3), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToLong(tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} PREHOOK: query: select * from ( select c1 + c2 x from dest_l2 union all @@ -623,7 +623,7 @@ PREHOOK: Output: default@dest_l3 PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l3 -{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col2, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col3, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToInteger(tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id) from dest_l2 a where a.c2 != 10 diff --git a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out index 6d941fd..5d213f6 100644 --- a/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out +++ b/ql/src/test/results/clientpositive/llap/columnStatsUpdateForStatsOptimizer_1.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 1 numRows 3 rawDataSize 21 @@ -79,7 +79,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 1 numRows 3 rawDataSize 21 @@ -115,56 +115,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(year) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(year) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -280,7 +236,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 4 rawDataSize 28 @@ -302,56 +258,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(year) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(year) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -369,56 +281,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(month) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: month (type: int) - outputColumnNames: month - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(month) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -459,7 +327,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"year\":\"true\"}} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"month\":\"true\",\"year\":\"true\"}} numFiles 2 numRows 4 rawDataSize 28 @@ -504,56 +372,12 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(month) from calendar POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendar - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: month (type: int) - outputColumnNames: month - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(month) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -670,7 +494,7 @@ Database: default Table: calendarp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"year\":\"true\"}} numFiles 1 numRows 3 rawDataSize 12 @@ -692,68 +516,22 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(year) from calendarp where p=1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendarp - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: max(year) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select max(year) from calendarp where p=1 PREHOOK: type: QUERY PREHOOK: Input: default@calendarp -PREHOOK: Input: default@calendarp@p=1 #### A masked pattern was here #### POSTHOOK: query: select max(year) from calendarp where p=1 POSTHOOK: type: QUERY POSTHOOK: Input: default@calendarp -POSTHOOK: Input: default@calendarp@p=1 #### A masked pattern was here #### 2012 PREHOOK: query: analyze table calendarp partition (p=1) compute statistics for columns @@ -850,7 +628,7 @@ Database: default Table: calendarp #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"year\":\"true\"}} numFiles 2 numRows 4 rawDataSize 16 @@ -872,68 +650,22 @@ PREHOOK: type: QUERY POSTHOOK: query: explain select max(year) from calendarp where p=1 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: calendarp - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: year (type: int) - outputColumnNames: year - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(year) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: max(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink PREHOOK: query: select max(year) from calendarp where p=1 PREHOOK: type: QUERY PREHOOK: Input: default@calendarp -PREHOOK: Input: default@calendarp@p=1 #### A masked pattern was here #### POSTHOOK: query: select max(year) from calendarp where p=1 POSTHOOK: type: QUERY POSTHOOK: Input: default@calendarp -POSTHOOK: Input: default@calendarp@p=1 #### A masked pattern was here #### 2015 PREHOOK: query: create table t (key string, value string) diff --git a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out index c56c818..0a98f84 100644 --- a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out @@ -406,19 +406,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -431,10 +431,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -538,19 +538,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToDouble(val) = 3.0) and key is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -564,10 +564,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 374 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 374 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -639,19 +639,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((UDFToDouble(key) = 6.0) and val is not null) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: val (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -664,10 +664,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 187 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 187 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -753,38 +753,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 425 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: t3 - Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 865 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -798,12 +798,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 467 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -814,10 +814,10 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 17 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 513 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out index d55cf30..2e6ede9 100644 --- a/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out +++ b/ql/src/test/results/clientpositive/llap/column_table_stats_orc.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1 rawDataSize 170 @@ -74,22 +74,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Statistics Aggregation Key Prefix: default.s/ GatherStats: true Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 16), compute_stats(value, 16) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col0 (type: struct), _col1 (type: struct) auto parallelism: false @@ -104,7 +104,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -125,7 +125,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -154,13 +154,13 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -365,7 +365,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -409,7 +409,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -770,7 +770,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 #### A masked pattern was here #### name default.spart @@ -974,7 +974,7 @@ Database: default Table: spart #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1 rawDataSize 170 diff --git a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out index dc50fb7..4a8f4d4 100644 --- a/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out +++ b/ql/src/test/results/clientpositive/llap/columnstats_part_coltype.q.out @@ -98,18 +98,18 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int from deserializer +key int 27 484 0 18 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=2, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string from deserializer +value string 0 18 6.8 7 from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr=2, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -152,18 +152,18 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int from deserializer +key int 27 495 0 28 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-02', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string from deserializer +value string 0 18 6.833333333333333 7 from deserializer PREHOOK: query: analyze table partcolstats partition (ds=date '2015-04-02', hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats @@ -210,36 +210,36 @@ PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int from deserializer +key int 15 495 0 43 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partA') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string from deserializer +value string 0 34 6.825 7 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') key POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -key int from deserializer +key int 15 495 0 51 from deserializer PREHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value PREHOOK: type: DESCTABLE PREHOOK: Input: default@partcolstats POSTHOOK: query: describe formatted partcolstats partition (ds=date '2015-04-03', hr=3, part='partB') value POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@partcolstats -# col_name data_type comment +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -value string from deserializer +value string 0 53 6.883333333333334 7 from deserializer PREHOOK: query: analyze table partcolstats partition (ds, hr, part) compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@partcolstats diff --git a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out index 4bdb186..037a419 100644 --- a/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out +++ b/ql/src/test/results/clientpositive/llap/constprog_semijoin.q.out @@ -62,30 +62,30 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Select Operator [SEL_11] (rows=5 width=22) + Select Operator [SEL_11] (rows=1 width=185) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_17] (rows=1 width=94) Conds:RS_8._col3=RS_9._col0(Left Semi),Output:["_col0","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:_col3 - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=2 width=189) Output:["_col0","_col2","_col3"] - Filter Operator [FIL_15] (rows=5 width=20) + Filter Operator [FIL_15] (rows=2 width=189) predicate:((val = 't1val01') and dimid is not null) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0 - Group By Operator [GBY_7] (rows=5 width=3) + Group By Operator [GBY_7] (rows=2 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=5 width=3) + Select Operator [SEL_5] (rows=5 width=4) Output:["_col0"] - Filter Operator [FIL_16] (rows=5 width=3) + Filter Operator [FIL_16] (rows=5 width=4) predicate:id is not null - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id where table1.val = 't1val01' PREHOOK: type: QUERY @@ -116,44 +116,44 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_18] - Select Operator [SEL_17] (rows=5 width=24) + Select Operator [SEL_17] (rows=1 width=187) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_28] (rows=5 width=24) + Merge Join Operator [MERGEJOIN_28] (rows=1 width=96) Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col4"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_8] (rows=3 width=10) + Select Operator [SEL_8] (rows=3 width=96) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=3 width=10) + Filter Operator [FIL_26] (rows=3 width=96) predicate:id is not null - TableScan [TS_6] (rows=3 width=10) - default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"] + TableScan [TS_6] (rows=3 width=96) + default@table2,table2,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val2"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_27] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_27] (rows=1 width=4) Conds:RS_11._col2=RS_12._col0(Left Semi),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col2 - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=2 width=99) Output:["_col0","_col2"] - Filter Operator [FIL_24] (rows=5 width=20) + Filter Operator [FIL_24] (rows=2 width=99) predicate:((val = 't1val01') and id is not null and dimid is not null) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","dimid"] + TableScan [TS_0] (rows=10 width=99) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","dimid"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=5 width=3) + Group By Operator [GBY_10] (rows=2 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=5 width=3) + Select Operator [SEL_5] (rows=5 width=4) Output:["_col0"] - Filter Operator [FIL_25] (rows=5 width=3) + Filter Operator [FIL_25] (rows=5 width=4) predicate:id is not null - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table2.val2 from table1 inner join table2 on table1.val = 't1val01' and table1.id = table2.id left semi join table3 on table1.dimid = table3.id PREHOOK: type: QUERY @@ -186,44 +186,44 @@ Stage-0 Stage-1 Reducer 3 llap File Output Operator [FS_18] - Select Operator [SEL_17] (rows=5 width=24) + Select Operator [SEL_17] (rows=1 width=187) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_28] (rows=5 width=24) + Merge Join Operator [MERGEJOIN_28] (rows=1 width=96) Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col0","_col4"] <-Map 5 [SIMPLE_EDGE] llap SHUFFLE [RS_15] PartitionCols:_col0 - Select Operator [SEL_8] (rows=3 width=10) + Select Operator [SEL_8] (rows=3 width=96) Output:["_col0","_col1"] - Filter Operator [FIL_26] (rows=3 width=10) + Filter Operator [FIL_26] (rows=3 width=96) predicate:id is not null - TableScan [TS_6] (rows=3 width=10) - default@table2,table2,Tbl:COMPLETE,Col:NONE,Output:["id","val2"] + TableScan [TS_6] (rows=3 width=96) + default@table2,table2,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val2"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_14] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_27] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_27] (rows=1 width=4) Conds:RS_11._col2=RS_12._col0(Left Semi),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:_col2 - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=2 width=99) Output:["_col0","_col2"] - Filter Operator [FIL_24] (rows=5 width=20) + Filter Operator [FIL_24] (rows=2 width=99) predicate:((val = 't1val01') and dimid is not null and id is not null) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","dimid"] + TableScan [TS_0] (rows=10 width=99) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","dimid"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=5 width=3) + Group By Operator [GBY_10] (rows=2 width=4) Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=5 width=3) + Select Operator [SEL_5] (rows=5 width=4) Output:["_col0"] - Filter Operator [FIL_25] (rows=5 width=3) + Filter Operator [FIL_25] (rows=5 width=4) predicate:id is not null - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table2.val2 from table1 left semi join table3 on table1.dimid = table3.id inner join table2 on table1.val = 't1val01' and table1.id = table2.id PREHOOK: type: QUERY @@ -255,28 +255,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=2 width=3) + Merge Join Operator [MERGEJOIN_17] (rows=1 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=1 width=20) + Select Operator [SEL_2] (rows=1 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=1 width=20) + Filter Operator [FIL_15] (rows=1 width=185) predicate:false - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1"] + TableScan [TS_0] (rows=10 width=185) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid <> 100 PREHOOK: type: QUERY @@ -303,28 +303,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_17] (rows=3 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=3 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=5 width=20) + Filter Operator [FIL_15] (rows=3 width=189) predicate:(dimid = 100) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid IN (100,200) PREHOOK: type: QUERY @@ -353,28 +353,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=2 width=3) + Merge Join Operator [MERGEJOIN_17] (rows=1 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=1 width=20) + Select Operator [SEL_2] (rows=1 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=1 width=20) + Filter Operator [FIL_15] (rows=1 width=185) predicate:false - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1"] + TableScan [TS_0] (rows=10 width=185) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid = 200 PREHOOK: type: QUERY @@ -401,28 +401,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_17] (rows=3 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=3 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=5 width=20) + Filter Operator [FIL_15] (rows=3 width=189) predicate:(dimid = 100) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 where table1.dimid = 100 PREHOOK: type: QUERY @@ -451,28 +451,28 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_12] - Merge Join Operator [MERGEJOIN_17] (rows=5 width=22) + Merge Join Operator [MERGEJOIN_17] (rows=3 width=185) Conds:RS_8.100, true=RS_9._col0, _col1(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_8] PartitionCols:100, true - Select Operator [SEL_2] (rows=5 width=20) + Select Operator [SEL_2] (rows=3 width=193) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_15] (rows=5 width=20) + Filter Operator [FIL_15] (rows=3 width=189) predicate:(dimid = 100) - TableScan [TS_0] (rows=10 width=20) - default@table1,table1,Tbl:COMPLETE,Col:NONE,Output:["id","val","val1","dimid"] + TableScan [TS_0] (rows=10 width=189) + default@table1,table1,Tbl:COMPLETE,Col:COMPLETE,Output:["id","val","val1","dimid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_9] PartitionCols:_col0, _col1 - Group By Operator [GBY_7] (rows=2 width=3) + Group By Operator [GBY_7] (rows=1 width=8) Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_5] (rows=2 width=3) + Select Operator [SEL_5] (rows=1 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_16] (rows=2 width=3) + Filter Operator [FIL_16] (rows=1 width=4) predicate:(id = 100) - TableScan [TS_3] (rows=5 width=3) - default@table3,table3,Tbl:COMPLETE,Col:NONE,Output:["id"] + TableScan [TS_3] (rows=5 width=4) + default@table3,table3,Tbl:COMPLETE,Col:COMPLETE,Output:["id"] PREHOOK: query: select table1.id, table1.val, table1.val1 from table1 left semi join table3 on table1.dimid = table3.id and table3.id = 100 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/cte_5.q.out b/ql/src/test/results/clientpositive/llap/cte_5.q.out index 58a963b..f3485da 100644 --- a/ql/src/test/results/clientpositive/llap/cte_5.q.out +++ b/ql/src/test/results/clientpositive/llap/cte_5.q.out @@ -85,17 +85,17 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Select Operator [SEL_9] (rows=2 width=89) + Select Operator [SEL_9] (rows=2 width=4) Output:["_col0"] - Merge Join Operator [MERGEJOIN_13] (rows=2 width=89) + Merge Join Operator [MERGEJOIN_13] (rows=2 width=8) Conds:(Inner) <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_6] - Select Operator [SEL_2] (rows=1 width=3) - Filter Operator [FIL_11] (rows=1 width=3) + Select Operator [SEL_2] (rows=1 width=4) + Filter Operator [FIL_11] (rows=1 width=4) predicate:(UDFToDouble(colnum) = 5.0) - TableScan [TS_0] (rows=1 width=3) - mydb@q1,a,Tbl:COMPLETE,Col:NONE,Output:["colnum"] + TableScan [TS_0] (rows=1 width=4) + mydb@q1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["colnum"] <-Map 3 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] Select Operator [SEL_5] (rows=2 width=85) diff --git a/ql/src/test/results/clientpositive/llap/cte_mat_5.q.out b/ql/src/test/results/clientpositive/llap/cte_mat_5.q.out index 0c22b72..ceb9bfa 100644 --- a/ql/src/test/results/clientpositive/llap/cte_mat_5.q.out +++ b/ql/src/test/results/clientpositive/llap/cte_mat_5.q.out @@ -84,17 +84,17 @@ Stage-3 Stage-4 Reducer 3 llap File Output Operator [FS_15] - Merge Join Operator [MERGEJOIN_20] (rows=1 width=3) + Merge Join Operator [MERGEJOIN_20] (rows=1 width=4) Conds:RS_11.UDFToDouble(_col0)=RS_12.UDFToDouble(_col0)(Inner),Output:["_col0"] <-Map 2 [SIMPLE_EDGE] llap SHUFFLE [RS_11] PartitionCols:UDFToDouble(_col0) - Select Operator [SEL_7] (rows=1 width=3) + Select Operator [SEL_7] (rows=1 width=4) Output:["_col0"] - Filter Operator [FIL_18] (rows=1 width=3) + Filter Operator [FIL_18] (rows=1 width=4) predicate:colnum is not null - TableScan [TS_5] (rows=1 width=3) - mydb@q1,a,Tbl:COMPLETE,Col:NONE,Output:["colnum"] + TableScan [TS_5] (rows=1 width=4) + mydb@q1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["colnum"] <-Map 4 [SIMPLE_EDGE] llap SHUFFLE [RS_12] PartitionCols:UDFToDouble(_col0) diff --git a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out index 98ba6af..ac9e4fc 100644 --- a/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out +++ b/ql/src/test/results/clientpositive/llap/deleteAnalyze.q.out @@ -48,6 +48,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: + COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} numFiles 1 totalSize 578 #### A masked pattern was here #### @@ -71,7 +72,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@testdeci2 # col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment -amount decimal(10,3) from deserializer +amount decimal(10,3) 12.123 123.123 0 2 from deserializer +COLUMN_STATS_ACCURATE {\"COLUMN_STATS\":{\"amount\":\"true\",\"id\":\"true\",\"item\":\"true\",\"sales_tax\":\"true\"}} PREHOOK: query: analyze table testdeci2 compute statistics for columns PREHOOK: type: QUERY PREHOOK: Input: default@testdeci2 diff --git a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out index ca03652..86cbfb0 100644 --- a/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out +++ b/ql/src/test/results/clientpositive/llap/disable_merge_for_bucketing.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -140,6 +141,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -177,6 +213,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY @@ -211,18 +255,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (((hash(key) & 2147483647) % 2) = 0) (type: boolean) - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -232,10 +276,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out index 8950b70..617864f 100644 --- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out +++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_3.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 Stage-2 depends on stages: Stage-5 Stage-7 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 @@ -41,13 +43,14 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 8 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Map 1 <- Reducer 9 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +69,7 @@ STAGE PLANS: value expressions: ROW__ID (type: struct) Execution mode: llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: s @@ -215,6 +218,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -231,7 +254,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT - Reducer 8 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -260,6 +311,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + Stage: Stage-2 Move Operator tables: @@ -310,16 +375,18 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 1 <- Reducer 5 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Map 1 <- Reducer 6 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -337,7 +404,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: s @@ -402,7 +469,35 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -431,6 +526,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + PREHOOK: query: explain merge into acidTbl as t using ( select * from nonAcidOrcTbl where a > 0 union all @@ -454,6 +556,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 Stage-2 depends on stages: Stage-5 Stage-7 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 @@ -467,14 +571,15 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 10 <- Reducer 8 (BROADCAST_EDGE) - Map 9 <- Union 2 (CONTAINS) - Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Map 10 <- Union 2 (CONTAINS) + Map 11 <- Reducer 9 (BROADCAST_EDGE) + Reducer 3 <- Map 11 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 3 (SIMPLE_EDGE) Reducer 6 <- Reducer 3 (SIMPLE_EDGE) Reducer 7 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) - Reducer 8 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -514,22 +619,6 @@ STAGE PLANS: Map 10 Map Operator Tree: TableScan - alias: t - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Filter Operator - predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl_a_min) AND DynamicValue(RS_10_nonacidorctbl_a_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl_a_bloom_filter))) (type: boolean) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: a (type: int) - sort order: + - Map-reduce partition columns: a (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: ROW__ID (type: struct) - Execution mode: llap - LLAP IO: may be used (ACID table) - Map 9 - Map Operator Tree: - TableScan alias: nonacidorctbl filterExpr: (b > 0) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -561,6 +650,22 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Execution mode: llap LLAP IO: all inputs + Map 11 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: (a BETWEEN DynamicValue(RS_10_nonacidorctbl_a_min) AND DynamicValue(RS_10_nonacidorctbl_a_max) and in_bloom_filter(a, DynamicValue(RS_10_nonacidorctbl_a_bloom_filter))) (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: a (type: int) + sort order: + + Map-reduce partition columns: a (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: ROW__ID (type: struct) + Execution mode: llap + LLAP IO: may be used (ACID table) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -684,6 +789,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -700,10 +825,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=4) mode: final outputColumnNames: _col0, _col1, _col2 @@ -731,6 +884,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + Stage: Stage-2 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out index 453711c..76ae92b 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_vectorization.q.out @@ -160,6 +160,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -173,19 +174,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -195,11 +196,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -225,6 +226,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_orc + PREHOOK: query: explain insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 @@ -234,6 +242,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -248,20 +257,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap @@ -272,15 +281,15 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -288,11 +297,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -318,6 +327,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit_orc + PREHOOK: query: explain insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -327,6 +343,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -340,19 +357,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -362,11 +379,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -391,6 +408,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_orc + PREHOOK: query: explain insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -400,6 +424,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -413,19 +438,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -435,11 +460,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -464,6 +489,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort_orc + PREHOOK: query: insert overwrite table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc @@ -541,6 +573,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -554,19 +587,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), _col0 (type: smallint) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -576,11 +609,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -606,6 +639,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_orc + PREHOOK: query: explain insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_limit_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 limit 10 @@ -615,6 +655,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -629,20 +670,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap @@ -653,15 +694,15 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), VALUE._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -669,11 +710,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -699,6 +740,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit_orc + PREHOOK: query: explain insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_buck_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -708,6 +756,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -721,19 +770,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -743,11 +792,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -772,6 +821,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_orc + PREHOOK: query: explain insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_buck_sort_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -781,6 +837,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -794,19 +851,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -816,11 +873,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -845,6 +902,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort_orc + PREHOOK: query: insert into table over1k_part_orc partition(ds="foo", t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by si PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc @@ -1324,6 +1388,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1331,42 +1396,79 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: bigint), VALUE._col2 (type: float), VALUE._col3 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 18 Data size: 1998 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1387,6 +1489,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i @@ -1396,6 +1505,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1409,19 +1519,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col2 (type: bigint), _col3 (type: float) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1431,11 +1541,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), KEY._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1461,6 +1571,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from (select * from over1k_orc order by i limit 10) tmp where t is null or t=27 @@ -1470,6 +1587,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1484,15 +1602,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float) Execution mode: vectorized, llap @@ -1503,22 +1621,22 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), KEY.reducesinkkey0 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col0 is null or (_col0 = 27)) (type: boolean) - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint) sort order: + Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float) Reducer 3 Execution mode: vectorized, llap @@ -1526,11 +1644,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), VALUE._col3 (type: float), KEY._col4 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1556,6 +1674,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t @@ -1565,6 +1690,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1572,48 +1698,85 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ Map-reduce partition columns: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 9 Data size: 999 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2 Data size: 3990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 4022 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1634,6 +1797,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 group by si,i,b,f,t @@ -1643,6 +1813,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1656,20 +1827,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float) sort order: +++++ Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1679,15 +1850,15 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint), KEY._col1 (type: smallint), KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col0 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -1713,6 +1884,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2_orc + PREHOOK: query: insert overwrite table over1k_part2_orc partition(ds="foo",t) select si,i,b,f,t from over1k_orc where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc @@ -1755,7 +1933,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1797,7 +1975,7 @@ Database: default Table: over1k_part2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2034,6 +2212,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2041,43 +2220,80 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: float) sort order: + Map-reduce partition columns: _col0 (type: smallint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY.reducesinkkey0 (type: float), VALUE._col3 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2_orc + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 2 Data size: 3816 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3848 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2 Data size: 3848 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 3848 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2097,6 +2313,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2_orc + PREHOOK: query: explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 @@ -2106,6 +2329,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2119,19 +2343,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1k_orc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 25160 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (t is null or (t = 27)) (type: boolean) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: si (type: smallint), i (type: int), b (type: bigint), f (type: float), t (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: tinyint), '_bucket_number' (type: string), _col3 (type: float) sort order: +++ Map-reduce partition columns: _col4 (type: tinyint) - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2141,11 +2365,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: smallint), VALUE._col1 (type: int), VALUE._col2 (type: bigint), KEY._col3 (type: float), KEY._col4 (type: tinyint), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1048 Data size: 310873 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 2196 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -2170,6 +2394,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2_orc + PREHOOK: query: insert overwrite table over1k_part_buck_sort2_orc partition(t) select si,i,b,f,t from over1k_orc where t is null or t=27 PREHOOK: type: QUERY PREHOOK: Input: default@over1k_orc @@ -2211,7 +2442,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2252,7 +2483,7 @@ Database: default Table: over1k_part_buck_sort2_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out index 1dc9ed5..3229b18 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization.q.out @@ -117,6 +117,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -182,6 +183,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part + PREHOOK: query: explain insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 @@ -191,6 +199,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -275,6 +284,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit + PREHOOK: query: explain insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -284,6 +300,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -348,6 +365,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck + PREHOOK: query: explain insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -357,6 +381,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -421,6 +446,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort + PREHOOK: query: insert overwrite table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY PREHOOK: Input: default@over1k @@ -498,6 +530,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -563,6 +596,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part + PREHOOK: query: explain insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_limit partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 limit 10 @@ -572,6 +612,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -656,6 +697,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_limit + PREHOOK: query: explain insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_buck partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -665,6 +713,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -729,6 +778,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck + PREHOOK: query: explain insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert into table over1k_part_buck_sort partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -738,6 +794,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -802,6 +859,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort + PREHOOK: query: insert into table over1k_part partition(ds="foo", t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY PREHOOK: Input: default@over1k @@ -1281,6 +1345,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1288,6 +1353,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1324,6 +1390,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1344,6 +1446,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i @@ -1353,6 +1462,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1418,6 +1528,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from (select * from over1k order by i limit 10) tmp where t is null or t=27 @@ -1427,6 +1544,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1513,6 +1631,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t @@ -1522,6 +1647,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1529,6 +1655,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1571,6 +1698,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), 'foo' (type: string), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, ds, t + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: ds (type: string), t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint) + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1110 Data size: 26640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1110 Data size: 26640 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1110 Data size: 26640 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1591,6 +1754,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 group by si,i,b,f,t @@ -1600,6 +1770,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1670,6 +1841,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part2 + PREHOOK: query: insert overwrite table over1k_part2 partition(ds="foo",t) select si,i,b,f,t from over1k where t is null or t=27 order by i PREHOOK: type: QUERY PREHOOK: Input: default@over1k @@ -1712,7 +1890,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -1754,7 +1932,7 @@ Database: default Table: over1k_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -1991,6 +2169,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1998,6 +2177,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2035,6 +2215,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part_buck_sort2 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: bigint), _col3 (type: float), _col4 (type: tinyint) + outputColumnNames: si, i, b, f, t + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(i, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: t (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 4442 Data size: 106611 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2221 Data size: 53305 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2054,6 +2270,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2 + PREHOOK: query: explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 @@ -2063,6 +2286,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2127,6 +2351,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, i, b, f + Column Types: smallint, int, bigint, float + Table: default.over1k_part_buck_sort2 + PREHOOK: query: insert overwrite table over1k_part_buck_sort2 partition(t) select si,i,b,f,t from over1k where t is null or t=27 PREHOOK: type: QUERY PREHOOK: Input: default@over1k @@ -2168,7 +2399,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 16 rawDataSize 415 @@ -2209,7 +2440,7 @@ Database: default Table: over1k_part_buck_sort2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\",\"f\":\"true\",\"i\":\"true\",\"si\":\"true\"}} numFiles 1 numRows 3 rawDataSize 78 @@ -2438,6 +2669,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2504,6 +2736,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 @@ -2513,6 +2752,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2579,6 +2819,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 @@ -2588,6 +2835,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2654,6 +2902,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 @@ -2663,6 +2918,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2729,6 +2985,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and s="foo" PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and s="foo" @@ -2738,6 +3001,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2804,6 +3068,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 and s="foo" PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where t=27 and s="foo" @@ -2813,6 +3084,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -2879,6 +3151,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 and s="foo" PREHOOK: type: QUERY POSTHOOK: query: explain insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where i=100 and t=27 and s="foo" @@ -2888,11 +3167,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -2914,8 +3197,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.over1k_part3 + Select Operator + expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: float), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int) + outputColumnNames: si, b, f, s, t, i + Statistics: Num rows: 107 Data size: 13282 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(si, 16), compute_stats(b, 16), compute_stats(f, 16) + keys: s (type: string), t (type: tinyint), i (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 107 Data size: 13282 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + Statistics: Num rows: 107 Data size: 13282 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string), KEY._col1 (type: tinyint), KEY._col2 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 53 Data size: 6578 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: string), _col1 (type: tinyint), _col2 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 53 Data size: 6578 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 53 Data size: 6578 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2937,6 +3256,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: si, b, f + Column Types: smallint, bigint, float + Table: default.over1k_part3 + PREHOOK: query: insert overwrite table over1k_part3 partition(s,t,i) select si,b,f,s,t,i from over1k where s="foo" PREHOOK: type: QUERY PREHOOK: Input: default@over1k diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out index d4811d6..1bafedc 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization2.q.out @@ -77,6 +77,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -146,6 +147,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part + PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -320,6 +328,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -384,6 +393,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part + PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -558,6 +574,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -607,6 +624,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ss_part + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 16), compute_stats(ss_net_profit, 16) + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -626,6 +664,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part + PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -679,7 +724,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 151 @@ -739,7 +784,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 186 @@ -800,13 +845,14 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -822,7 +868,8 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col2 (type: int) + sort order: + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) @@ -843,6 +890,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ss_part + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 16), compute_stats(ss_net_profit, 16) + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -862,6 +930,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part + PREHOOK: query: insert overwrite table ss_part partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -909,7 +984,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 151 @@ -969,7 +1044,7 @@ Database: default Table: ss_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 186 @@ -1091,6 +1166,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1104,42 +1180,63 @@ STAGE PLANS: Map Operator Tree: TableScan alias: ss_orc - Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: ss_sold_date_sk (type: int), ss_net_paid_inc_tax (type: float), ss_net_profit (type: float) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: float), _col2 (type: float) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: int), KEY._col1 (type: float), KEY._col2 (type: float) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: float), _col2 (type: float), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.ss_part_orc + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 16), compute_stats(ss_net_profit, 16) + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1159,6 +1256,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part_orc + PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -1212,7 +1316,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 88 @@ -1272,7 +1376,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 104 @@ -1333,49 +1437,72 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: ss_orc - Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((ss_sold_date_sk >= 2452617) and (ss_sold_date_sk <= 2452638)) (type: boolean) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_net_paid_inc_tax (type: float), ss_net_profit (type: float), ss_sold_date_sk (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: + key expressions: _col2 (type: int) + sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: float), VALUE._col1 (type: float), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.ss_part_orc + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: int) + outputColumnNames: ss_net_paid_inc_tax, ss_net_profit, ss_sold_date_sk + Statistics: Num rows: 8 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(ss_net_paid_inc_tax, 16), compute_stats(ss_net_profit, 16) + keys: ss_sold_date_sk (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 956 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1395,6 +1522,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ss_net_paid_inc_tax, ss_net_profit + Column Types: float, float + Table: default.ss_part_orc + PREHOOK: query: insert overwrite table ss_part_orc partition (ss_sold_date_sk) select ss_net_paid_inc_tax, ss_net_profit, @@ -1442,7 +1576,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 11 rawDataSize 88 @@ -1502,7 +1636,7 @@ Database: default Table: ss_part_orc #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ss_net_paid_inc_tax\":\"true\",\"ss_net_profit\":\"true\"}} numFiles 1 numRows 13 rawDataSize 104 @@ -1601,6 +1735,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1608,6 +1743,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1634,7 +1770,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1654,6 +1790,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.hive13_dp1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: k1, k2, day + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + keys: day (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1039 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1039 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1673,6 +1845,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.hive13_dp1 + PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`) select key k1, @@ -1731,6 +1910,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -1738,6 +1918,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1764,7 +1945,7 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1784,6 +1965,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.hive13_dp1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: k1, k2, day + Statistics: Num rows: 205 Data size: 19475 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + keys: day (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1039 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1039 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1047 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1803,6 +2020,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.hive13_dp1 + PREHOOK: query: insert overwrite table `hive13_dp1` partition(`day`) select key k1, diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out index 788854a..7dbd639 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out @@ -92,19 +92,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part - Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 696 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2752 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2752 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -113,10 +113,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2752 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2752 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -185,19 +185,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part - Statistics: Num rows: 1600 Data size: 312400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 433600 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 147200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 2168 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 3472 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 3472 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col3 (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -207,10 +207,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 3472 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 3472 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -372,19 +372,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part_sdpo - Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 550400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 550400 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -393,10 +393,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 550400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 550400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -465,19 +465,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_part_sdpo - Statistics: Num rows: 1600 Data size: 312400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 433600 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 147200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 433600 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string) outputColumnNames: _col0, _col3 - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 694400 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), '_bucket_number' (type: string), _col0 (type: struct) sort order: +++ Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 800 Data size: 347200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 694400 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -486,11 +486,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), KEY._col3 (type: string), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, '_bucket_number' - Statistics: Num rows: 800 Data size: 286400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 572800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 800 Data size: 286400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 572800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -661,19 +661,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part - Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 696 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2784 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2784 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -682,10 +682,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2784 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 2784 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -755,19 +755,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part - Statistics: Num rows: 3200 Data size: 48800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 291200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 1600 Data size: 6400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 1456 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), hr (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 5568 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 5568 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col4 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) @@ -777,10 +777,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), VALUE._col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 5568 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 16 Data size: 5568 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -879,19 +879,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part - Statistics: Num rows: 3200 Data size: 637600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 892800 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) - Statistics: Num rows: 1600 Data size: 300800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 4185 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), _col2 (type: int) Execution mode: llap LLAP IO: may be used (ACID table) @@ -901,10 +901,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), VALUE._col0 (type: string), VALUE._col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 15 Data size: 3960 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1073,19 +1073,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo - Statistics: Num rows: 1600 Data size: 30800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 139200 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct) outputColumnNames: _col0 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: struct) sort order: + Map-reduce partition columns: UDFToInteger(_col0) (type: int) - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1094,10 +1094,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), 11 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 800 Data size: 15400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1167,19 +1167,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo - Statistics: Num rows: 3200 Data size: 48800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 291200 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 1600 Data size: 6400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 291200 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), hr (type: int) outputColumnNames: _col0, _col4 - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1113600 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: '2008-04-08' (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int) - Statistics: Num rows: 1600 Data size: 556800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1113600 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1188,11 +1188,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1600 Data size: 435200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 870400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1600 Data size: 435200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 870400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1291,19 +1291,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo - Statistics: Num rows: 3200 Data size: 637600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 880000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (value = 'bar') (type: boolean) - Statistics: Num rows: 1600 Data size: 300800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 880000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 844800 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col1 (type: string), _col2 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 1600 Data size: 422400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 844800 Basic stats: COMPLETE Column stats: PARTIAL Execution mode: llap LLAP IO: may be used (ACID table) Reducer 2 @@ -1312,11 +1312,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), KEY._col1 (type: string), KEY._col2 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, '_bucket_number' - Statistics: Num rows: 1600 Data size: 579200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1158400 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1600 Data size: 579200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1158400 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1485,19 +1485,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo_no_cp - Statistics: Num rows: 1600 Data size: 318800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 440000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 800 Data size: 150400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 440000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), key (type: string), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 800 Data size: 280800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 700800 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 800 Data size: 280800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 700800 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1507,11 +1507,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 800 Data size: 359200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 857600 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 800 Data size: 359200 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1600 Data size: 857600 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -1581,19 +1581,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: acid_2l_part_sdpo_no_cp - Statistics: Num rows: 3200 Data size: 637600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 880000 Basic stats: COMPLETE Column stats: PARTIAL Filter Operator predicate: (key = 'foo') (type: boolean) - Statistics: Num rows: 1600 Data size: 300800 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 880000 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ROW__ID (type: struct), key (type: string), ds (type: string), hr (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 1600 Data size: 561600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1401600 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: int), '_bucket_number' (type: string), _col0 (type: struct) sort order: ++++ Map-reduce partition columns: _col3 (type: string), _col4 (type: int) - Statistics: Num rows: 1600 Data size: 561600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1401600 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col1 (type: string), 'bar' (type: string) Execution mode: llap LLAP IO: may be used (ACID table) @@ -1603,11 +1603,11 @@ STAGE PLANS: Select Operator expressions: KEY._col0 (type: struct), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY.'_bucket_number' (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, '_bucket_number' - Statistics: Num rows: 1600 Data size: 718400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1715200 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 1600 Data size: 718400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3200 Data size: 1715200 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/except_distinct.q.out b/ql/src/test/results/clientpositive/llap/except_distinct.q.out index 94125b5..dadb657 100644 --- a/ql/src/test/results/clientpositive/llap/except_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/except_distinct.q.out @@ -735,42 +735,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: int) outputColumnNames: value - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -780,32 +780,32 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(2) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), 2 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), sum(_col2) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -815,17 +815,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 > 0) and ((_col1 * 2) = _col2)) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -837,32 +837,32 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), 1 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: bigint), (_col1 * _col2) (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), sum(_col2) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out index 584c3b5..80afa88 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_1.q.out @@ -45,21 +45,39 @@ POSTHOOK: query: explain insert overwrite table src_orc_merge_test_part partitio POSTHOOK: type: QUERY Plan optimized by CBO. -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.src_orc_merge_test_part"} - Stage-2 - Dependency Collection{} - Stage-1 - Map 1 llap - File Output Operator [FS_3] - table:{"name:":"default.src_orc_merge_test_part"} - Select Operator [SEL_1] (rows=500 width=95) - Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +Vertex dependency in root stage +Reducer 2 <- Map 1 (SIMPLE_EDGE) + +Stage-4 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.src_orc_merge_test_part"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 2 llap + File Output Operator [FS_6] + Select Operator [SEL_5] (rows=1 width=1157) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_4] (rows=1 width=1157) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + <-Map 1 [SIMPLE_EDGE] llap + File Output Operator [FS_3] + table:{"name:":"default.src_orc_merge_test_part"} + Select Operator [SEL_1] (rows=500 width=95) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1 + Group By Operator [GBY_2] (rows=1 width=1165) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"],keys:ds, ts + Select Operator [SEL_1] (rows=500 width=292) + Output:["key","value","ds","ts"] + Please refer to the previous Select Operator [SEL_1] PREHOOK: query: insert overwrite table src_orc_merge_test_part partition(ds='2012-01-03', ts='2012-01-03+14:46:31') select * from src PREHOOK: type: QUERY @@ -79,32 +97,48 @@ Plan optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.src_orc_merge_test_part"} - Stage-2 - Dependency Collection{} - Stage-1 - Reducer 2 llap - File Output Operator [FS_7] - table:{"name:":"default.src_orc_merge_test_part"} - Select Operator [SEL_6] (rows=100 width=95) - Output:["_col0","_col1"] - Limit [LIM_5] (rows=100 width=178) - Number of rows:100 - Select Operator [SEL_4] (rows=100 width=178) - Output:["_col0","_col1"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_3] - Limit [LIM_2] (rows=100 width=178) - Number of rows:100 - Select Operator [SEL_1] (rows=500 width=178) +Stage-4 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.src_orc_merge_test_part"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 3 llap + File Output Operator [FS_6] + Select Operator [SEL_5] (rows=1 width=1157) + Output:["_col0","_col1","_col2","_col3"] + Group By Operator [GBY_4] (rows=1 width=1157) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"],keys:KEY._col0, KEY._col1 + <-Reducer 2 [SIMPLE_EDGE] llap + File Output Operator [FS_7] + table:{"name:":"default.src_orc_merge_test_part"} + Select Operator [SEL_6] (rows=100 width=95) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=178) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + Limit [LIM_5] (rows=100 width=178) + Number of rows:100 + Select Operator [SEL_4] (rows=100 width=178) + Output:["_col0","_col1"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_3] + Limit [LIM_2] (rows=100 width=178) + Number of rows:100 + Select Operator [SEL_1] (rows=500 width=178) + Output:["_col0","_col1"] + TableScan [TS_0] (rows=500 width=178) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + SHUFFLE [RS_3] + PartitionCols:_col0, _col1 + Group By Operator [GBY_2] (rows=1 width=1165) + Output:["_col0","_col1","_col2","_col3"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"],keys:ds, ts + Select Operator [SEL_1] (rows=100 width=292) + Output:["key","value","ds","ts"] + Please refer to the previous Select Operator [SEL_6] PREHOOK: query: explain select count(1) from src_orc_merge_test_part where ds='2012-01-03' and ts='2012-01-03+14:46:31' PREHOOK: type: QUERY @@ -137,10 +171,10 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_5] Group By Operator [GBY_4] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_2] (rows=500 width=102) + Select Operator [SEL_2] (rows=500 width=95) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=102) - default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=500 width=95) + default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: alter table src_orc_merge_test_part partition (ds='2012-01-03', ts='2012-01-03+14:46:31') concatenate PREHOOK: type: ALTER_PARTITION_MERGE @@ -196,10 +230,10 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_5] Group By Operator [GBY_4] (rows=1 width=16) Output:["_col0","_col1"],aggregations:["sum(_col0)","sum(_col1)"] - Select Operator [SEL_2] (rows=500 width=102) + Select Operator [SEL_2] (rows=500 width=95) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500 width=102) - default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=500 width=95) + default@src_orc_merge_test_part,src_orc_merge_test_part,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table src_orc_merge_test_part PREHOOK: type: DROPTABLE @@ -3130,10 +3164,10 @@ Stage-0 Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=5 width=6) + Select Operator [SEL_1] (rows=5 width=93) Output:["_col0","_col1"] - TableScan [TS_0] (rows=5 width=6) - default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=5 width=93) + default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: alter table tgt_rc_merge_test concatenate PREHOOK: type: ALTER_TABLE_MERGE @@ -3202,10 +3236,10 @@ Stage-0 Output:["_col0","_col1"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=5 width=6) + Select Operator [SEL_1] (rows=5 width=93) Output:["_col0","_col1"] - TableScan [TS_0] (rows=5 width=6) - default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=5 width=93) + default@tgt_rc_merge_test,tgt_rc_merge_test,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table src_rc_merge_test PREHOOK: type: DROPTABLE @@ -3849,20 +3883,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=56 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=56 width=16) Conds:RS_2.key=RS_3.key(Inner),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key AND a.value <=> b.value PREHOOK: type: QUERY @@ -3879,20 +3913,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=4 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=4 width=16) Conds:RS_2.key, value=RS_3.key, value(Inner),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key, value TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key, value TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(a) */ * FROM smb_input1 a RIGHT OUTER JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY @@ -3909,20 +3943,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=56 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=56 width=16) Conds:RS_2.key=RS_3.key(Right Outer),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY @@ -3939,20 +3973,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=56 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=56 width=16) Conds:RS_2.key=RS_3.key(Inner),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: explain select /*+ MAPJOIN(b) */ * FROM smb_input1 a LEFT OUTER JOIN smb_input1 b ON a.key <=> b.key PREHOOK: type: QUERY @@ -3969,20 +4003,20 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=28 width=7) + Select Operator [SEL_5] (rows=56 width=16) Output:["_col0","_col1","_col2","_col3"] - Merge Join Operator [MERGEJOIN_7] (rows=28 width=7) + Merge Join Operator [MERGEJOIN_7] (rows=56 width=16) Conds:RS_2.key=RS_3.key(Left Outer),Output:["_col0","_col1","_col5","_col6"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_2] PartitionCols:key TableScan [TS_0] (rows=26 width=7) - default@smb_input1,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_3] PartitionCols:key TableScan [TS_1] (rows=26 width=7) - default@smb_input1,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + default@smb_input1,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: drop table sales PREHOOK: type: DROPTABLE @@ -4930,66 +4964,102 @@ Plan not optimized by CBO. Vertex dependency in root stage Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) -Reducer 4 <- Reducer 2 (SIMPLE_EDGE) -Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Reducer 2 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (SIMPLE_EDGE) +Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.part_4"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 3 llap - File Output Operator [FS_9] - table:{"name:":"default.part_4"} - Select Operator [SEL_7] (rows=26 width=239) - Output:["_col0","_col1","_col2","_col3","_col4","_col5"] - PTF Operator [PTF_6] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_5] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_4] - PartitionCols:_col2 - PTF Operator [PTF_3] (rows=26 width=499) - Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}}] - Select Operator [SEL_2] (rows=26 width=499) - Output:["_col1","_col2","_col5","_col7"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_1] - PartitionCols:p_mfgr - TableScan [TS_0] (rows=26 width=231) - default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] - Reducer 5 llap - File Output Operator [FS_20] - table:{"name:":"default.part_5"} - Select Operator [SEL_17] (rows=26 width=247) - Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] - PTF Operator [PTF_16] (rows=26 width=499) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col3"}] - Select Operator [SEL_15] (rows=26 width=499) - Output:["_col0","_col2","_col3","_col6"] - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_14] - PartitionCols:_col2 - Select Operator [SEL_13] (rows=26 width=491) - Output:["sum_window_0","_col1","_col2","_col5"] - PTF Operator [PTF_12] (rows=26 width=491) - Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] - Select Operator [SEL_11] (rows=26 width=491) - Output:["_col1","_col2","_col5"] - <-Reducer 2 [SIMPLE_EDGE] llap - SHUFFLE [RS_10] - PartitionCols:_col2 - Please refer to the previous PTF Operator [PTF_3] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.part_5"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.part_4"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 llap + File Output Operator [FS_7] + Group By Operator [GBY_5] (rows=1 width=2880) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=2888) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)","compute_stats(VALUE._col4, 16)","compute_stats(VALUE._col5, 16)","compute_stats(VALUE._col6, 16)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_9] + table:{"name:":"default.part_4"} + Select Operator [SEL_7] (rows=26 width=239) + Output:["_col0","_col1","_col2","_col3","_col4","_col5"] + PTF Operator [PTF_6] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}] + Select Operator [SEL_5] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_4] + PartitionCols:_col2 + PTF Operator [PTF_3] (rows=26 width=499) + Function definitions:[{},{"Partition table definition":{"name:":"noop","order by:":"_col1 ASC NULLS FIRST","partition by:":"_col2"}}] + Select Operator [SEL_2] (rows=26 width=499) + Output:["_col1","_col2","_col5","_col7"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_1] + PartitionCols:p_mfgr + TableScan [TS_0] (rows=26 width=231) + default@part,part,Tbl:COMPLETE,Col:COMPLETE,Output:["p_mfgr","p_name","p_retailprice","p_size"] + PARTITION_ONLY_SHUFFLE [RS_2] + PartitionCols:rand() + Select Operator [SEL_1] (rows=26 width=239) + Output:["p_mfgr","p_name","p_size","r","dr","s"] + Please refer to the previous Select Operator [SEL_7] + Reducer 9 llap + File Output Operator [FS_7] + Group By Operator [GBY_5] (rows=1 width=3840) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)","compute_stats(VALUE._col5)","compute_stats(VALUE._col6)","compute_stats(VALUE._col7)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=3840) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)","compute_stats(VALUE._col4, 16)","compute_stats(VALUE._col5, 16)","compute_stats(VALUE._col6, 16)","compute_stats(VALUE._col7, 16)","compute_stats(VALUE._col8, 16)"] + <-Reducer 7 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_20] + table:{"name:":"default.part_5"} + Select Operator [SEL_17] (rows=26 width=247) + Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] + PTF Operator [PTF_16] (rows=26 width=499) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col3 ASC NULLS FIRST, _col2 ASC NULLS FIRST","partition by:":"_col3"}] + Select Operator [SEL_15] (rows=26 width=499) + Output:["_col0","_col2","_col3","_col6"] + <-Reducer 6 [SIMPLE_EDGE] llap + SHUFFLE [RS_14] + PartitionCols:_col2 + Select Operator [SEL_13] (rows=26 width=491) + Output:["sum_window_0","_col1","_col2","_col5"] + PTF Operator [PTF_12] (rows=26 width=491) + Function definitions:[{},{"name:":"windowingtablefunction","order by:":"_col5 ASC NULLS FIRST","partition by:":"_col2"}] + Select Operator [SEL_11] (rows=26 width=491) + Output:["_col1","_col2","_col5"] + <-Reducer 2 [SIMPLE_EDGE] llap + SHUFFLE [RS_10] + PartitionCols:_col2 + Please refer to the previous PTF Operator [PTF_3] + PARTITION_ONLY_SHUFFLE [RS_2] + PartitionCols:rand() + Select Operator [SEL_1] (rows=26 width=247) + Output:["p_mfgr","p_name","p_size","s2","r","dr","cud","fv1"] + Please refer to the previous Select Operator [SEL_17] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.part_5"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: explain select p_mfgr, p_name, @@ -5345,41 +5415,58 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest_j1"} - Stage-2 - Dependency Collection{} - Stage-1 - Reducer 2 llap - File Output Operator [FS_11] - table:{"name:":"default.dest_j1"} - Select Operator [SEL_9] (rows=1219 width=95) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_16] (rows=1219 width=178) - Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_6] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=500 width=87) - Output:["_col0"] - Filter Operator [FIL_14] (rows=500 width=87) - predicate:key is not null - TableScan [TS_0] (rows=500 width=87) - default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] - <-Map 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_7] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=500 width=178) - predicate:key is not null - TableScan [TS_3] (rows=500 width=178) - default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] +Stage-4 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest_j1"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 4 llap + File Output Operator [FS_7] + Group By Operator [GBY_5] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 3 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=968) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 2 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_11] + table:{"name:":"default.dest_j1"} + Select Operator [SEL_9] (rows=1219 width=95) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_16] (rows=1219 width=178) + Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col2"] + <-Map 1 [SIMPLE_EDGE] llap + SHUFFLE [RS_6] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=500 width=87) + Output:["_col0"] + Filter Operator [FIL_14] (rows=500 width=87) + predicate:key is not null + TableScan [TS_0] (rows=500 width=87) + default@src,src1,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Map 5 [SIMPLE_EDGE] llap + SHUFFLE [RS_7] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_15] (rows=500 width=178) + predicate:key is not null + TableScan [TS_3] (rows=500 width=178) + default@src,src2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + PARTITION_ONLY_SHUFFLE [RS_2] + PartitionCols:rand() + Select Operator [SEL_1] (rows=1219 width=95) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_9] PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 select src1.key, src2.value diff --git a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out index c291473..38c2da1 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -2926,6 +2926,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -2936,19 +2939,22 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 12 <- Union 9 (CONTAINS) - Map 13 <- Union 9 (CONTAINS) - Map 16 <- Map 17 (BROADCAST_EDGE) - Map 18 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 19 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 20 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 21 <- Map 16 (BROADCAST_EDGE), Union 4 (CONTAINS) - Map 5 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 8 <- Union 9 (CONTAINS) - Reducer 10 <- Map 14 (SIMPLE_EDGE), Union 9 (SIMPLE_EDGE) - Reducer 11 <- Map 15 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE), Union 4 (CONTAINS) - Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Map 1 <- Map 9 (BROADCAST_EDGE), Union 2 (CONTAINS) + Map 11 <- Union 12 (CONTAINS) + Map 15 <- Union 12 (CONTAINS) + Map 16 <- Union 12 (CONTAINS) + Map 19 <- Map 20 (BROADCAST_EDGE) + Map 21 <- Map 19 (BROADCAST_EDGE), Union 4 (CONTAINS) + Map 22 <- Map 19 (BROADCAST_EDGE), Union 4 (CONTAINS) + Map 23 <- Map 19 (BROADCAST_EDGE), Union 4 (CONTAINS) + Map 24 <- Map 19 (BROADCAST_EDGE), Union 4 (CONTAINS) + Map 8 <- Map 9 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 13 <- Map 17 (SIMPLE_EDGE), Union 12 (SIMPLE_EDGE) + Reducer 14 <- Map 18 (SIMPLE_EDGE), Reducer 13 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) + Reducer 5 <- Union 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Union 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Union 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2971,7 +2977,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 6 + 1 Map 9 Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -2980,7 +2986,46 @@ STAGE PLANS: Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 12 + Map 10 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 11 + Map Operator Tree: + TableScan + alias: src1 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 15 Map Operator Tree: TableScan alias: src @@ -2999,7 +3044,7 @@ STAGE PLANS: Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 13 + Map 16 Map Operator Tree: TableScan alias: src @@ -3018,7 +3063,7 @@ STAGE PLANS: Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 14 + Map 17 Map Operator Tree: TableScan alias: x @@ -3038,7 +3083,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 15 + Map 18 Map Operator Tree: TableScan alias: y @@ -3058,7 +3103,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 16 + Map 19 Map Operator Tree: TableScan alias: x @@ -3078,7 +3123,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 17 + 1 Map 20 Statistics: Num rows: 27 Data size: 210 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -3106,7 +3151,7 @@ STAGE PLANS: value expressions: _col0 (type: string), _col3 (type: string) Execution mode: llap LLAP IO: no inputs - Map 17 + Map 20 Map Operator Tree: TableScan alias: y @@ -3126,7 +3171,7 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 18 + Map 21 Map Operator Tree: TableScan alias: src1 @@ -3146,7 +3191,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 0 Map 16 + 0 Map 19 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string) @@ -3160,6 +3205,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3168,6 +3226,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3176,9 +3247,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.c + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 19 + Map 22 Map Operator Tree: TableScan alias: src @@ -3198,7 +3282,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 0 Map 16 + 0 Map 19 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string) @@ -3212,6 +3296,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3220,6 +3317,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3228,9 +3338,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.c + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 20 + Map 23 Map Operator Tree: TableScan alias: src @@ -3250,7 +3373,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 0 Map 16 + 0 Map 19 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string) @@ -3264,6 +3387,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3272,6 +3408,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3280,9 +3429,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.c + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 21 + Map 24 Map Operator Tree: TableScan alias: src @@ -3302,7 +3464,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 0 Map 16 + 0 Map 19 Statistics: Num rows: 1677 Data size: 17739 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col3 (type: string) @@ -3316,6 +3478,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3324,6 +3499,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3332,9 +3520,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.c + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 8 Map Operator Tree: TableScan alias: src @@ -3354,7 +3555,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 6 + 1 Map 9 Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) @@ -3363,7 +3564,7 @@ STAGE PLANS: Statistics: Num rows: 577 Data size: 6053 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 6 + Map 9 Map Operator Tree: TableScan alias: x @@ -3389,46 +3590,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 7 - Map Operator Tree: - TableScan - alias: y - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: llap - LLAP IO: no inputs - Map 8 - Map Operator Tree: - TableScan - alias: src1 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1025 Data size: 10815 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Reducer 10 + Reducer 13 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -3444,7 +3606,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1127 Data size: 11896 Basic stats: COMPLETE Column stats: NONE - Reducer 11 + Reducer 14 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -3467,6 +3629,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3475,6 +3650,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3483,6 +3671,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.c + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3506,6 +3707,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3514,6 +3728,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE @@ -3522,12 +3749,70 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.c + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3550 Data size: 37482 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 12 + Vertex: Union 12 Union 2 Vertex: Union 2 Union 4 Vertex: Union 4 - Union 9 - Vertex: Union 9 Stage: Stage-4 Dependency Collection @@ -3545,6 +3830,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.b + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.c + Stage: Stage-1 Move Operator tables: @@ -3607,307 +3913,354 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 12 <- Union 13 (CONTAINS) -Map 19 <- Union 13 (CONTAINS) -Map 20 <- Union 15 (CONTAINS) -Map 23 <- Map 24 (BROADCAST_EDGE) -Map 25 <- Union 26 (CONTAINS) -Map 32 <- Union 26 (CONTAINS) -Map 33 <- Union 28 (CONTAINS) -Map 34 <- Union 30 (CONTAINS) -Map 9 <- Union 2 (CONTAINS) -Reducer 14 <- Union 13 (SIMPLE_EDGE), Union 15 (CONTAINS) -Reducer 16 <- Union 15 (SIMPLE_EDGE) -Reducer 17 <- Map 21 (SIMPLE_EDGE), Reducer 16 (SIMPLE_EDGE) -Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 27 <- Union 26 (SIMPLE_EDGE), Union 28 (CONTAINS) -Reducer 29 <- Union 28 (SIMPLE_EDGE), Union 30 (CONTAINS) -Reducer 3 <- Map 10 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 31 <- Map 23 (BROADCAST_EDGE), Union 30 (SIMPLE_EDGE), Union 7 (CONTAINS) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 12 <- Union 2 (CONTAINS) +Map 15 <- Union 16 (CONTAINS) +Map 22 <- Union 16 (CONTAINS) +Map 23 <- Union 18 (CONTAINS) +Map 26 <- Map 27 (BROADCAST_EDGE) +Map 28 <- Union 29 (CONTAINS) +Map 35 <- Union 29 (CONTAINS) +Map 36 <- Union 31 (CONTAINS) +Map 37 <- Union 33 (CONTAINS) +Reducer 10 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) +Reducer 17 <- Union 16 (SIMPLE_EDGE), Union 18 (CONTAINS) +Reducer 19 <- Union 18 (SIMPLE_EDGE) +Reducer 20 <- Map 24 (SIMPLE_EDGE), Reducer 19 (SIMPLE_EDGE) +Reducer 21 <- Map 25 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 3 <- Map 13 (BROADCAST_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) +Reducer 32 <- Union 31 (SIMPLE_EDGE), Union 33 (CONTAINS) +Reducer 34 <- Map 26 (BROADCAST_EDGE), Union 33 (SIMPLE_EDGE), Union 7 (CONTAINS) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 8 <- Union 7 (SIMPLE_EDGE) +Reducer 9 <- Reducer 8 (CUSTOM_SIMPLE_EDGE) -Stage-5 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.a"} - Stage-4 - Dependency Collection{} - Stage-3 - Reducer 8 llap - File Output Operator [FS_123] - table:{"name:":"default.a"} - Group By Operator [GBY_120] (rows=530 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 7 [SIMPLE_EDGE] - <-Reducer 31 [CONTAINS] llap - Reduce Output Operator [RS_119] - PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1061 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_114] (rows=484 width=10) - Output:["_col0","_col1"] - Map Join Operator [MAPJOIN_172] (rows=484 width=10) - Conds:RS_111._col1=SEL_107._col1(Inner),Output:["_col0","_col3"] - <-Map 23 [BROADCAST_EDGE] llap - BROADCAST [RS_111] - PartitionCols:_col1 - Map Join Operator [MAPJOIN_171] (rows=27 width=7) - Conds:SEL_71._col0=RS_109._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 24 [BROADCAST_EDGE] llap - BROADCAST [RS_109] - PartitionCols:_col0 - Select Operator [SEL_74] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_162] (rows=25 width=7) - predicate:key is not null - TableScan [TS_72] (rows=25 width=7) - default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_71] (rows=25 width=7) +Stage-8 + Column Stats Work{} + Stage-5 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.a"} + Stage-4 + Dependency Collection{} + Stage-3 + Reducer 10 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 8 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_123] + table:{"name:":"default.a"} + Group By Operator [GBY_120] (rows=530 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 7 [SIMPLE_EDGE] + <-Reducer 34 [CONTAINS] llap + Reduce Output Operator [RS_119] + PartitionCols:_col0, _col1 + Group By Operator [GBY_118] (rows=1061 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_114] (rows=484 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_161] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_69] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_107] (rows=440 width=10) - Output:["_col1"] - Group By Operator [GBY_106] (rows=440 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 30 [SIMPLE_EDGE] - <-Map 34 [CONTAINS] llap - Reduce Output Operator [RS_105] - PartitionCols:_col0, _col1 - Group By Operator [GBY_104] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_100] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_166] (rows=500 width=10) - predicate:value is not null - TableScan [TS_98] (rows=500 width=10) - Output:["key","value"] - <-Reducer 29 [CONTAINS] llap - Reduce Output Operator [RS_105] - PartitionCols:_col0, _col1 - Group By Operator [GBY_104] (rows=881 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_97] (rows=381 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_96] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 28 [SIMPLE_EDGE] - <-Map 33 [CONTAINS] llap - Reduce Output Operator [RS_95] - PartitionCols:_col0, _col1 - Group By Operator [GBY_94] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_90] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_165] (rows=500 width=10) - predicate:value is not null - TableScan [TS_88] (rows=500 width=10) - Output:["key","value"] - <-Reducer 27 [CONTAINS] llap - Reduce Output Operator [RS_95] - PartitionCols:_col0, _col1 - Group By Operator [GBY_94] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_87] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_86] (rows=262 width=10) + Map Join Operator [MAPJOIN_172] (rows=484 width=10) + Conds:RS_111._col1=SEL_107._col1(Inner),Output:["_col0","_col3"] + <-Map 26 [BROADCAST_EDGE] llap + BROADCAST [RS_111] + PartitionCols:_col1 + Map Join Operator [MAPJOIN_171] (rows=27 width=7) + Conds:SEL_71._col0=RS_109._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 27 [BROADCAST_EDGE] llap + BROADCAST [RS_109] + PartitionCols:_col0 + Select Operator [SEL_74] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_162] (rows=25 width=7) + predicate:key is not null + TableScan [TS_72] (rows=25 width=7) + default@src1,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_71] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_161] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_69] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_107] (rows=440 width=10) + Output:["_col1"] + Group By Operator [GBY_106] (rows=440 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 33 [SIMPLE_EDGE] + <-Map 37 [CONTAINS] llap + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_100] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_166] (rows=500 width=10) + predicate:value is not null + TableScan [TS_98] (rows=500 width=10) + Output:["key","value"] + <-Reducer 32 [CONTAINS] llap + Reduce Output Operator [RS_105] + PartitionCols:_col0, _col1 + Group By Operator [GBY_104] (rows=881 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_97] (rows=381 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_96] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 31 [SIMPLE_EDGE] + <-Map 36 [CONTAINS] llap + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_90] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_165] (rows=500 width=10) + predicate:value is not null + TableScan [TS_88] (rows=500 width=10) + Output:["key","value"] + <-Reducer 30 [CONTAINS] llap + Reduce Output Operator [RS_95] + PartitionCols:_col0, _col1 + Group By Operator [GBY_94] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_87] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_86] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 29 [SIMPLE_EDGE] + <-Map 28 [CONTAINS] llap + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_77] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_163] (rows=25 width=7) + predicate:value is not null + TableScan [TS_75] (rows=25 width=7) + Output:["key","value"] + <-Map 35 [CONTAINS] llap + Reduce Output Operator [RS_85] + PartitionCols:_col0, _col1 + Group By Operator [GBY_84] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_80] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_164] (rows=500 width=10) + predicate:value is not null + TableScan [TS_78] (rows=500 width=10) + Output:["key","value"] + <-Reducer 6 [CONTAINS] llap + Reduce Output Operator [RS_119] + PartitionCols:_col0, _col1 + Group By Operator [GBY_118] (rows=1061 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Group By Operator [GBY_67] (rows=577 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 5 [SIMPLE_EDGE] + <-Reducer 21 [CONTAINS] llap + Reduce Output Operator [RS_66] + PartitionCols:_col0, _col1 + Group By Operator [GBY_65] (rows=1155 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_61] (rows=605 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_170] (rows=605 width=10) + Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] + <-Map 25 [SIMPLE_EDGE] llap + SHUFFLE [RS_59] + PartitionCols:_col0 + Select Operator [SEL_54] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_160] (rows=500 width=10) + predicate:key is not null + TableScan [TS_52] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 20 [SIMPLE_EDGE] llap + SHUFFLE [RS_58] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_169] (rows=550 width=10) + Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] + <-Map 24 [SIMPLE_EDGE] llap + SHUFFLE [RS_56] + PartitionCols:_col1 + Select Operator [SEL_51] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_159] (rows=500 width=10) + predicate:(key is not null and value is not null) + TableScan [TS_49] (rows=500 width=10) + default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 19 [SIMPLE_EDGE] llap + SHUFFLE [RS_55] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=381 width=10) + Output:["_col1"] + Group By Operator [GBY_47] (rows=381 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 18 [SIMPLE_EDGE] + <-Map 23 [CONTAINS] llap + Reduce Output Operator [RS_46] + PartitionCols:_col0, _col1 + Group By Operator [GBY_45] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_41] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_158] (rows=500 width=10) + predicate:value is not null + TableScan [TS_39] (rows=500 width=10) + Output:["key","value"] + <-Reducer 17 [CONTAINS] llap + Reduce Output Operator [RS_46] + PartitionCols:_col0, _col1 + Group By Operator [GBY_45] (rows=762 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_38] (rows=262 width=10) + Output:["_col0","_col1"] + Group By Operator [GBY_37] (rows=262 width=10) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 16 [SIMPLE_EDGE] + <-Map 15 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_28] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=25 width=7) + predicate:value is not null + TableScan [TS_26] (rows=25 width=7) + Output:["key","value"] + <-Map 22 [CONTAINS] llap + Reduce Output Operator [RS_36] + PartitionCols:_col0, _col1 + Group By Operator [GBY_35] (rows=525 width=10) + Output:["_col0","_col1"],keys:_col1, _col0 + Select Operator [SEL_31] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_157] (rows=500 width=10) + predicate:value is not null + TableScan [TS_29] (rows=500 width=10) + Output:["key","value"] + <-Reducer 4 [CONTAINS] llap + Reduce Output Operator [RS_66] + PartitionCols:_col0, _col1 + Group By Operator [GBY_65] (rows=1155 width=10) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_25] (rows=550 width=10) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_168] (rows=550 width=10) + Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] + <-Map 14 [SIMPLE_EDGE] llap + SHUFFLE [RS_23] + PartitionCols:_col0 + Select Operator [SEL_18] (rows=500 width=10) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=500 width=10) + predicate:key is not null + TableScan [TS_16] (rows=500 width=10) + default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] llap + SHUFFLE [RS_22] + PartitionCols:_col2 + Map Join Operator [MAPJOIN_167] (rows=288 width=10) + Conds:SEL_12._col1=RS_20._col1(Inner),Output:["_col2"] + <-Map 13 [BROADCAST_EDGE] llap + BROADCAST [RS_20] + PartitionCols:_col1 + Select Operator [SEL_15] (rows=25 width=7) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=25 width=7) + predicate:(key is not null and value is not null) + TableScan [TS_13] (rows=25 width=7) + default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + <-Select Operator [SEL_12] (rows=262 width=10) + Output:["_col1"] + Group By Operator [GBY_11] (rows=262 width=10) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 26 [SIMPLE_EDGE] - <-Map 25 [CONTAINS] llap - Reduce Output Operator [RS_85] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] llap + Reduce Output Operator [RS_10] PartitionCols:_col0, _col1 - Group By Operator [GBY_84] (rows=525 width=10) + Group By Operator [GBY_9] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_77] (rows=25 width=7) + Select Operator [SEL_2] (rows=25 width=7) Output:["_col0","_col1"] - Filter Operator [FIL_163] (rows=25 width=7) + Filter Operator [FIL_152] (rows=25 width=7) predicate:value is not null - TableScan [TS_75] (rows=25 width=7) + TableScan [TS_0] (rows=25 width=7) Output:["key","value"] - <-Map 32 [CONTAINS] llap - Reduce Output Operator [RS_85] + <-Map 12 [CONTAINS] llap + Reduce Output Operator [RS_10] PartitionCols:_col0, _col1 - Group By Operator [GBY_84] (rows=525 width=10) + Group By Operator [GBY_9] (rows=525 width=10) Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_80] (rows=500 width=10) + Select Operator [SEL_5] (rows=500 width=10) Output:["_col0","_col1"] - Filter Operator [FIL_164] (rows=500 width=10) + Filter Operator [FIL_153] (rows=500 width=10) predicate:value is not null - TableScan [TS_78] (rows=500 width=10) + TableScan [TS_3] (rows=500 width=10) Output:["key","value"] - <-Reducer 6 [CONTAINS] llap - Reduce Output Operator [RS_119] - PartitionCols:_col0, _col1 - Group By Operator [GBY_118] (rows=1061 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Group By Operator [GBY_67] (rows=577 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 5 [SIMPLE_EDGE] - <-Reducer 18 [CONTAINS] llap - Reduce Output Operator [RS_66] - PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1155 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_61] (rows=605 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_170] (rows=605 width=10) - Conds:RS_58._col2=RS_59._col0(Inner),Output:["_col2","_col5"] - <-Map 22 [SIMPLE_EDGE] llap - SHUFFLE [RS_59] - PartitionCols:_col0 - Select Operator [SEL_54] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_160] (rows=500 width=10) - predicate:key is not null - TableScan [TS_52] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 17 [SIMPLE_EDGE] llap - SHUFFLE [RS_58] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_169] (rows=550 width=10) - Conds:RS_55._col1=RS_56._col1(Inner),Output:["_col2"] - <-Map 21 [SIMPLE_EDGE] llap - SHUFFLE [RS_56] - PartitionCols:_col1 - Select Operator [SEL_51] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_159] (rows=500 width=10) - predicate:(key is not null and value is not null) - TableScan [TS_49] (rows=500 width=10) - default@src,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 16 [SIMPLE_EDGE] llap - SHUFFLE [RS_55] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=381 width=10) - Output:["_col1"] - Group By Operator [GBY_47] (rows=381 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 15 [SIMPLE_EDGE] - <-Map 20 [CONTAINS] llap - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_41] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=500 width=10) - predicate:value is not null - TableScan [TS_39] (rows=500 width=10) - Output:["key","value"] - <-Reducer 14 [CONTAINS] llap - Reduce Output Operator [RS_46] - PartitionCols:_col0, _col1 - Group By Operator [GBY_45] (rows=762 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_38] (rows=262 width=10) - Output:["_col0","_col1"] - Group By Operator [GBY_37] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 13 [SIMPLE_EDGE] - <-Map 12 [CONTAINS] llap - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_28] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=25 width=7) - predicate:value is not null - TableScan [TS_26] (rows=25 width=7) - Output:["key","value"] - <-Map 19 [CONTAINS] llap - Reduce Output Operator [RS_36] - PartitionCols:_col0, _col1 - Group By Operator [GBY_35] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_31] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=500 width=10) - predicate:value is not null - TableScan [TS_29] (rows=500 width=10) - Output:["key","value"] - <-Reducer 4 [CONTAINS] llap - Reduce Output Operator [RS_66] - PartitionCols:_col0, _col1 - Group By Operator [GBY_65] (rows=1155 width=10) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_25] (rows=550 width=10) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_168] (rows=550 width=10) - Conds:RS_22._col2=RS_23._col0(Inner),Output:["_col2","_col5"] - <-Map 11 [SIMPLE_EDGE] llap - SHUFFLE [RS_23] - PartitionCols:_col0 - Select Operator [SEL_18] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=500 width=10) - predicate:key is not null - TableScan [TS_16] (rows=500 width=10) - default@src,y,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Reducer 3 [SIMPLE_EDGE] llap - SHUFFLE [RS_22] - PartitionCols:_col2 - Map Join Operator [MAPJOIN_167] (rows=288 width=10) - Conds:SEL_12._col1=RS_20._col1(Inner),Output:["_col2"] - <-Map 10 [BROADCAST_EDGE] llap - BROADCAST [RS_20] - PartitionCols:_col1 - Select Operator [SEL_15] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=25 width=7) - predicate:(key is not null and value is not null) - TableScan [TS_13] (rows=25 width=7) - default@src1,x,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_12] (rows=262 width=10) - Output:["_col1"] - Group By Operator [GBY_11] (rows=262 width=10) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] llap - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_2] (rows=25 width=7) - Output:["_col0","_col1"] - Filter Operator [FIL_152] (rows=25 width=7) - predicate:value is not null - TableScan [TS_0] (rows=25 width=7) - Output:["key","value"] - <-Map 9 [CONTAINS] llap - Reduce Output Operator [RS_10] - PartitionCols:_col0, _col1 - Group By Operator [GBY_9] (rows=525 width=10) - Output:["_col0","_col1"],keys:_col1, _col0 - Select Operator [SEL_5] (rows=500 width=10) - Output:["_col0","_col1"] - Filter Operator [FIL_153] (rows=500 width=10) - predicate:value is not null - TableScan [TS_3] (rows=500 width=10) - Output:["key","value"] - File Output Operator [FS_125] - table:{"name:":"default.b"} - Please refer to the previous Group By Operator [GBY_120] - File Output Operator [FS_127] - table:{"name:":"default.c"} - Please refer to the previous Group By Operator [GBY_120] -Stage-6 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.b"} - Please refer to the previous Stage-4 -Stage-7 - Stats-Aggr Operator - Stage-2 - Move Operator - table:{"name:":"default.c"} - Please refer to the previous Stage-4 + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=530 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] + File Output Operator [FS_125] + table:{"name:":"default.b"} + Please refer to the previous Group By Operator [GBY_120] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=530 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] + File Output Operator [FS_127] + table:{"name:":"default.c"} + Please refer to the previous Group By Operator [GBY_120] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=530 width=10) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_120] + Reducer 11 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <- Please refer to the previous Reducer 8 [CUSTOM_SIMPLE_EDGE] + Reducer 9 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <- Please refer to the previous Reducer 8 [CUSTOM_SIMPLE_EDGE] + Stage-6 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.b"} + Please refer to the previous Stage-4 + Stage-7 + Stats-Aggr Operator + Stage-2 + Move Operator + table:{"name:":"default.c"} + Please refer to the previous Stage-4 +Stage-9 + Column Stats Work{} + Please refer to the previous Stage-5 + Please refer to the previous Stage-6 + Please refer to the previous Stage-7 +Stage-10 + Column Stats Work{} + Please refer to the previous Stage-5 + Please refer to the previous Stage-6 + Please refer to the previous Stage-7 PREHOOK: query: CREATE TABLE DEST1(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -3942,72 +4295,101 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 5 llap - File Output Operator [FS_22] - table:{"name:":"default.dest1"} - Select Operator [SEL_20] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_19] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] llap - SHUFFLE [RS_18] - PartitionCols:_col0 - Group By Operator [GBY_17] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Group By Operator [GBY_14] (rows=1 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_13] - PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_8] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_7] (rows=500 width=10) - Output:["key","value"] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_13] - PartitionCols:_col0, _col1 - Group By Operator [GBY_12] (rows=1 width=272) - Output:["_col0","_col1"],keys:_col0, _col1 - Select Operator [SEL_6] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_4] - Group By Operator [GBY_3] (rows=1 width=8) - Output:["_col0"],aggregations:["count(1)"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - File Output Operator [FS_29] - table:{"name:":"default.dest2"} - Select Operator [SEL_27] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_26] (rows=1 width=464) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_14] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 6 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_22] + table:{"name:":"default.dest1"} + Select Operator [SEL_20] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_19] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_18] + PartitionCols:_col0 + Group By Operator [GBY_17] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Group By Operator [GBY_14] (rows=1 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_13] + PartitionCols:_col0, _col1 + Group By Operator [GBY_12] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_8] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_7] (rows=500 width=10) + Output:["key","value"] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_13] + PartitionCols:_col0, _col1 + Group By Operator [GBY_12] (rows=1 width=272) + Output:["_col0","_col1"],keys:_col0, _col1 + Select Operator [SEL_6] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_5] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=8) + Output:["_col0"],aggregations:["count(1)"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + File Output Operator [FS_29] + table:{"name:":"default.dest2"} + Select Operator [SEL_27] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_26] (rows=1 width=464) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_14] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1476) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 16)","compute_stats(val1, 16)","compute_stats(val2, 16)"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_27] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <- Please refer to the previous Reducer 4 [CUSTOM_SIMPLE_EDGE] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: EXPLAIN FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b (b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key PREHOOK: type: QUERY @@ -4115,91 +4497,119 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) -Map 7 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 4 llap - File Output Operator [FS_20] - table:{"name:":"default.dest1"} - Select Operator [SEL_18] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_17] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_8] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_7] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_23] - PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_8] - <-Map 7 [CONTAINS] llap - Reduce Output Operator [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_12] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_11] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_23] - PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_12] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_16] - PartitionCols:_col0 - Group By Operator [GBY_15] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_6] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_4] - Group By Operator [GBY_3] (rows=1 width=8) - Output:["_col0"],aggregations:["count(1)"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_23] - PartitionCols:_col0, _col1 - Group By Operator [GBY_22] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_6] - Reducer 5 llap - File Output Operator [FS_27] - table:{"name:":"default.dest2"} - Select Operator [SEL_25] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_24] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_20] + table:{"name:":"default.dest1"} + Select Operator [SEL_18] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_17] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_8] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_7] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_8] + <-Map 9 [CONTAINS] llap + Reduce Output Operator [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_12] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_11] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_12] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_16] + PartitionCols:_col0 + Group By Operator [GBY_15] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_6] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_5] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=8) + Output:["_col0"],aggregations:["count(1)"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_23] + PartitionCols:_col0, _col1 + Group By Operator [GBY_22] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_6] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_18] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_27] + table:{"name:":"default.dest2"} + Select Operator [SEL_25] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_24] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1476) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 16)","compute_stats(val1, 16)","compute_stats(val2, 16)"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_25] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: explain FROM ( @@ -4224,74 +4634,102 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 4 llap - File Output Operator [FS_17] - table:{"name:":"default.dest1"} - Select Operator [SEL_15] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_14] (rows=1 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] llap - Reduce Output Operator [RS_13] - PartitionCols:_col0 - Group By Operator [GBY_12] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_8] (rows=500 width=10) - Output:["_col0","_col1"] - TableScan [TS_7] (rows=500 width=10) - Output:["key","value"] - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Group By Operator [GBY_19] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_8] - <-Reducer 2 [CONTAINS] llap - Reduce Output Operator [RS_13] - PartitionCols:_col0 - Group By Operator [GBY_12] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) - Select Operator [SEL_6] (rows=1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_5] (rows=1 width=8) - Output:["_col0"],aggregations:["count(VALUE._col0)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] llap - PARTITION_ONLY_SHUFFLE [RS_4] - Group By Operator [GBY_3] (rows=1 width=8) - Output:["_col0"],aggregations:["count(1)"] - Select Operator [SEL_1] (rows=500 width=10) - TableScan [TS_0] (rows=500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Group By Operator [GBY_19] (rows=1 width=464) - Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) - Please refer to the previous Select Operator [SEL_6] - Reducer 5 llap - File Output Operator [FS_24] - table:{"name:":"default.dest2"} - Select Operator [SEL_22] (rows=1 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_21] (rows=1 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_17] + table:{"name:":"default.dest1"} + Select Operator [SEL_15] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_14] (rows=1 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] llap + Reduce Output Operator [RS_13] + PartitionCols:_col0 + Group By Operator [GBY_12] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_8] (rows=500 width=10) + Output:["_col0","_col1"] + TableScan [TS_7] (rows=500 width=10) + Output:["key","value"] + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Group By Operator [GBY_19] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_8] + <-Reducer 2 [CONTAINS] llap + Reduce Output Operator [RS_13] + PartitionCols:_col0 + Group By Operator [GBY_12] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) + Select Operator [SEL_6] (rows=1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_5] (rows=1 width=8) + Output:["_col0"],aggregations:["count(VALUE._col0)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] llap + PARTITION_ONLY_SHUFFLE [RS_4] + Group By Operator [GBY_3] (rows=1 width=8) + Output:["_col0"],aggregations:["count(1)"] + Select Operator [SEL_1] (rows=500 width=10) + TableScan [TS_0] (rows=500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Group By Operator [GBY_19] (rows=1 width=464) + Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) + Please refer to the previous Select Operator [SEL_6] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=984) + Output:["_col0","_col1"],aggregations:["compute_stats(key, 16)","compute_stats(value, 16)"] + Select Operator [SEL_1] (rows=1 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_15] + Reducer 7 llap + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] llap + File Output Operator [FS_24] + table:{"name:":"default.dest2"} + Select Operator [SEL_22] (rows=1 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_21] (rows=1 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=1476) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(key, 16)","compute_stats(val1, 16)","compute_stats(val2, 16)"] + Select Operator [SEL_1] (rows=1 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_22] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out index d97223c..43b1880 100644 --- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out +++ b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out @@ -197,7 +197,7 @@ STAGE PLANS: partition values: year 2000 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt,zip @@ -332,7 +332,7 @@ STAGE PLANS: partition values: year 2003 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true","zip":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt,zip @@ -853,7 +853,7 @@ STAGE PLANS: year 2001 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -899,7 +899,7 @@ STAGE PLANS: year 2002 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -945,7 +945,7 @@ STAGE PLANS: year 2003 zip 43201 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -991,7 +991,7 @@ STAGE PLANS: year 2000 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1083,7 +1083,7 @@ STAGE PLANS: year 2002 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1129,7 +1129,7 @@ STAGE PLANS: year 2003 zip 94086 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1175,7 +1175,7 @@ STAGE PLANS: year 2000 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1221,7 +1221,7 @@ STAGE PLANS: year 2001 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt @@ -1313,7 +1313,7 @@ STAGE PLANS: year 2003 zip 94087 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","locid":"true","state":"true"}} bucket_count -1 column.name.delimiter , columns state,locid,cnt diff --git a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out index c5cdb77..86309a7 100644 --- a/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/llap/filter_join_breaktask.q.out @@ -45,22 +45,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: f - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1450 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1450 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 auto parallelism: true Execution mode: llap @@ -76,7 +76,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -121,22 +121,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: m - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2289 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: ((value <> '') and key is not null) (type: boolean) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 1375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 2785 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15 Data size: 2785 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -153,7 +153,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -198,22 +198,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: g - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (value <> '') (type: boolean) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 411 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 4575 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true Execution mode: llap @@ -229,7 +229,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -281,14 +281,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col3 - Position of Big Table: 0 - Statistics: Num rows: 27 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Position of Big Table: 1 + Statistics: Num rows: 25 Data size: 2325 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 27 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 2325 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col0 (type: int) auto parallelism: true @@ -304,17 +304,17 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col5 Position of Big Table: 0 - Statistics: Num rows: 29 Data size: 497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4092 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4092 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 29 Data size: 497 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 44 Data size: 4092 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/groupby2.q.out b/ql/src/test/results/clientpositive/llap/groupby2.q.out index 29b85d1..0cee285 100644 --- a/ql/src/test/results/clientpositive/llap/groupby2.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby2.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -27,6 +28,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -81,6 +84,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 205 Data size: 76260 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 205 Data size: 76260 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -98,6 +137,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/groupby3.q.out b/ql/src/test/results/clientpositive/llap/groupby3.q.out index 3495de6..7f63fb9 100644 --- a/ql/src/test/results/clientpositive/llap/groupby3.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby3.q.out @@ -37,6 +37,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -45,6 +46,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +75,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reducer 3 @@ -95,6 +98,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4284 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4284 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4320 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -112,6 +143,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out index 33d88ce..b3bc457 100644 --- a/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/llap/hybridgrace_hashjoin_1.q.out @@ -768,14 +768,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -784,15 +784,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -800,19 +800,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -822,10 +822,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -889,14 +889,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -905,16 +905,16 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -922,19 +922,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -944,10 +944,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1003,72 +1003,78 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1124,73 +1130,78 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out index 1efb81b..35b6891 100644 --- a/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out +++ b/ql/src/test/results/clientpositive/llap/infer_bucket_sort_bucketed_table.q.out @@ -42,7 +42,7 @@ Database: default Table: test_table_bucketed #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 309 rawDataSize 1482 diff --git a/ql/src/test/results/clientpositive/llap/insert1.q.out b/ql/src/test/results/clientpositive/llap/insert1.q.out index aa09585..f49cc95 100644 --- a/ql/src/test/results/clientpositive/llap/insert1.q.out +++ b/ql/src/test/results/clientpositive/llap/insert1.q.out @@ -33,11 +33,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -59,8 +63,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -78,6 +110,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + PREHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY POSTHOOK: query: explain insert into table INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) @@ -87,11 +126,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -113,8 +156,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -132,6 +203,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + PREHOOK: query: create database x PREHOOK: type: CREATEDATABASE PREHOOK: Output: database:x @@ -155,11 +233,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -181,8 +263,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -200,6 +310,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + PREHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) PREHOOK: type: QUERY POSTHOOK: query: explain insert into table default.INSERT1 select a.key, a.value from insert2 a WHERE (a.key=-1) @@ -209,11 +326,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -235,8 +356,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -254,6 +403,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + PREHOOK: query: explain from insert2 insert into table insert1 select * where key < 10 @@ -269,6 +425,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -276,6 +434,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -297,6 +459,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -312,8 +487,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: x.insert1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -331,6 +549,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: x.insert1 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/insert_into1.q.out b/ql/src/test/results/clientpositive/llap/insert_into1.q.out index 5caefe8..24c225a 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into1.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into1.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -41,7 +43,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -67,6 +68,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -84,6 +113,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -141,6 +177,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -148,6 +185,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -163,7 +201,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -189,6 +226,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 9500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -206,6 +271,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -263,6 +335,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -270,6 +343,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -285,7 +359,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -311,6 +384,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 950 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -328,6 +429,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -385,11 +493,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -409,8 +521,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -428,6 +568,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: insert overwrite table insert_into1 select 1, 'a' PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -447,11 +594,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -471,8 +622,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -490,6 +669,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: insert into insert_into1 select 2, 'b' PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table diff --git a/ql/src/test/results/clientpositive/llap/insert_into2.q.out b/ql/src/test/results/clientpositive/llap/insert_into2.q.out index a42c651..9238082 100644 --- a/ql/src/test/results/clientpositive/llap/insert_into2.q.out +++ b/ql/src/test/results/clientpositive/llap/insert_into2.q.out @@ -23,6 +23,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -30,6 +31,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -45,7 +47,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -71,6 +72,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -90,6 +127,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -186,6 +230,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -193,6 +238,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -208,7 +254,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -234,6 +279,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 18000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -253,6 +334,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 PREHOOK: type: QUERY @@ -318,6 +406,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -325,6 +414,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -340,7 +430,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -366,6 +455,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 9000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -385,6 +510,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/intersect_all.q.out b/ql/src/test/results/clientpositive/llap/intersect_all.q.out index 4dac8a2..bb42d05 100644 --- a/ql/src/test/results/clientpositive/llap/intersect_all.q.out +++ b/ql/src/test/results/clientpositive/llap/intersect_all.q.out @@ -1551,42 +1551,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: int) outputColumnNames: value - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1596,24 +1596,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1), count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1623,24 +1623,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE function name: UDTFReplicateRows Select Operator expressions: col1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1652,24 +1652,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col1), count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out b/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out index 3698f03..de842a6 100644 --- a/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/intersect_distinct.q.out @@ -1153,42 +1153,42 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: int) outputColumnNames: value - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: value (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: key - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: key (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1198,24 +1198,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -1225,17 +1225,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1247,24 +1247,24 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/intersect_merge.q.out b/ql/src/test/results/clientpositive/llap/intersect_merge.q.out index 6217785..2d060f4 100644 --- a/ql/src/test/results/clientpositive/llap/intersect_merge.q.out +++ b/ql/src/test/results/clientpositive/llap/intersect_merge.q.out @@ -55,22 +55,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -78,22 +78,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -101,22 +101,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -124,22 +124,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -147,22 +147,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -174,18 +174,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -195,18 +195,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -216,18 +216,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -237,17 +237,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -260,18 +260,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -281,18 +281,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -327,22 +327,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -350,22 +350,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -373,22 +373,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -396,22 +396,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -423,18 +423,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -444,18 +444,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -465,17 +465,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 4) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -488,18 +488,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -509,18 +509,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 18 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -556,22 +556,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -579,22 +579,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -602,22 +602,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -625,22 +625,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -648,22 +648,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -675,18 +675,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -696,18 +696,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -717,18 +717,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -738,17 +738,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -761,18 +761,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -782,18 +782,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -829,22 +829,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -852,22 +852,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -875,22 +875,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -898,22 +898,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -921,22 +921,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -948,18 +948,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -969,18 +969,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -990,18 +990,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1011,17 +1011,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1034,18 +1034,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1055,18 +1055,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -1102,22 +1102,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1125,22 +1125,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1148,22 +1148,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1171,22 +1171,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1194,22 +1194,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1221,18 +1221,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 12 Execution mode: llap @@ -1242,18 +1242,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -1263,18 +1263,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1284,17 +1284,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 5) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1307,18 +1307,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1328,18 +1328,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 7 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -1373,22 +1373,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1396,22 +1396,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1419,22 +1419,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1446,18 +1446,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 4 Execution mode: llap @@ -1467,17 +1467,17 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1490,18 +1490,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1511,18 +1511,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Union 3 Vertex: Union 3 @@ -1556,22 +1556,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1579,22 +1579,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1602,22 +1602,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1629,18 +1629,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 4 Execution mode: llap @@ -1650,24 +1650,24 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col3 = 3) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE function name: UDTFReplicateRows Select Operator expressions: col1 (type: int), col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1680,18 +1680,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 8 Execution mode: llap @@ -1701,18 +1701,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Union 3 Vertex: Union 3 @@ -1747,22 +1747,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1770,22 +1770,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 4 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1793,22 +1793,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1820,18 +1820,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 2 Execution mode: llap @@ -1841,18 +1841,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Reducer 4 Execution mode: llap @@ -1862,24 +1862,24 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col3 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE UDTF Operator - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE function name: UDTFReplicateRows Select Operator expressions: col1 (type: int), col2 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1892,18 +1892,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -1913,31 +1913,31 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col2 = 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: int), _col1 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col2), count(_col2) keys: _col0 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint), _col3 (type: bigint) Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/llap/join1.q.out b/ql/src/test/results/clientpositive/llap/join1.q.out index d79a405..ab775a2 100644 --- a/ql/src/test/results/clientpositive/llap/join1.q.out +++ b/ql/src/test/results/clientpositive/llap/join1.q.out @@ -19,13 +19,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -47,7 +49,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -90,6 +92,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -107,6 +137,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out index 5c2e3d4..da48abf 100644 --- a/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/llap/join32_lessSize.q.out @@ -31,14 +31,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -58,13 +60,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 3 => 25 + Estimated key counts: Map 4 => 25 keys: 0 _col0 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 58 Data size: 10266 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -131,7 +133,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -207,7 +209,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: y @@ -332,6 +334,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 141 Data size: 37788 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -368,6 +417,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -503,15 +560,17 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -531,13 +590,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 4 => 25 + Estimated key counts: Map 5 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 4 + 1 Map 5 Position of Big Table: 0 Statistics: Num rows: 44 Data size: 11616 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator @@ -602,7 +661,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: z @@ -678,7 +737,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [z] - Map 5 + Map 6 Map Operator Tree: TableScan alias: w @@ -753,7 +812,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [w] - Map 6 + Map 7 Map Operator Tree: TableScan alias: y @@ -879,7 +938,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -900,6 +959,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 248 Data size: 65968 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -913,7 +1019,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -936,6 +1042,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src w JOIN src1 x ON (x.value = w.value) @@ -1067,14 +1181,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1154,7 +1270,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -1171,13 +1287,13 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Estimated key counts: Map 4 => 25 + Estimated key counts: Map 5 => 25 keys: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 5 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator @@ -1246,7 +1362,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -1371,6 +1487,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -1407,6 +1570,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -1540,14 +1711,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1627,7 +1800,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -1698,7 +1871,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] - Map 5 + Map 6 Map Operator Tree: TableScan alias: x @@ -1802,7 +1975,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1823,7 +1996,54 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 5 Execution mode: llap Needs Tagging: false Reduce Operator Tree: @@ -1862,7 +2082,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1885,6 +2105,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res @@ -2030,14 +2258,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2059,7 +2289,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -2079,7 +2309,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2093,7 +2323,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -2136,6 +2366,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2153,6 +2411,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, x.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -2286,14 +2551,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 3 <- Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Map 4 <- Map 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2315,7 +2582,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 184500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -2335,7 +2602,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 60 Data size: 10500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2349,7 +2616,7 @@ STAGE PLANS: value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -2392,6 +2659,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 140 Data size: 37240 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2409,6 +2704,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, y.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res diff --git a/ql/src/test/results/clientpositive/llap/join46.q.out b/ql/src/test/results/clientpositive/llap/join46.q.out index 56f6862..511ca25 100644 --- a/ql/src/test/results/clientpositive/llap/join46.q.out +++ b/ql/src/test/results/clientpositive/llap/join46.q.out @@ -62,16 +62,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -79,16 +79,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -102,10 +102,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,16 +169,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -186,19 +186,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -215,10 +215,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -283,14 +283,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -298,17 +298,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -325,10 +325,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -389,16 +389,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -406,16 +406,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -429,10 +429,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -491,14 +491,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -506,14 +506,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -530,10 +530,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -603,14 +603,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -618,14 +618,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -640,10 +640,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -716,14 +716,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -731,14 +731,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -753,10 +753,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -827,14 +827,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -842,14 +842,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -864,10 +864,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -934,16 +934,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -951,16 +951,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -975,10 +975,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1044,14 +1044,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1059,14 +1059,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1081,14 +1081,14 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or (_col0 = _col3))} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4200 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1156,14 +1156,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1171,14 +1171,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1193,10 +1193,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1269,14 +1269,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1284,14 +1284,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1306,10 +1306,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1377,14 +1377,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1392,14 +1392,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1414,10 +1414,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1486,16 +1486,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1503,16 +1503,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1527,10 +1527,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1596,14 +1596,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1611,14 +1611,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1633,10 +1633,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1709,14 +1709,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1724,14 +1724,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1746,10 +1746,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1820,14 +1820,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1835,14 +1835,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1857,10 +1857,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1929,16 +1929,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1946,16 +1946,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1970,10 +1970,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2071,16 +2071,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2088,16 +2088,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2105,16 +2105,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2122,16 +2122,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2146,10 +2146,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Reducer 3 Execution mode: llap @@ -2162,10 +2162,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} - Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2181,10 +2181,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out b/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out index 7fda074..e2f0845 100644 --- a/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out +++ b/ql/src/test/results/clientpositive/llap/join_emit_interval.q.out @@ -62,16 +62,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -79,16 +79,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -105,10 +105,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -168,14 +168,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -183,14 +183,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -207,10 +207,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/lineage2.q.out b/ql/src/test/results/clientpositive/llap/lineage2.q.out index 0967f20..2ae9a0a 100644 --- a/ql/src/test/results/clientpositive/llap/lineage2.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage2.q.out @@ -36,7 +36,7 @@ PREHOOK: query: insert into table dest1 select * from src2 PREHOOK: type: QUERY PREHOOK: Input: default@src2 PREHOOK: Output: default@dest1 -{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"ecc718a966d8887b18084a55dd96f0bc","queryText":"insert into table dest1 select * from src2","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"edgeType":"PROJECTION"},{"sources":[2],"targets":[0],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select key k, dest1.value from dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 @@ -467,20 +467,20 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e494b771d94800dc3430bf5d0810cd9f","queryText":"insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"efeaddd0d36105b1013b414627850dc2","queryText":"insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"src1.key is not null","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"src2.key2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1 PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"src1.value is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"src2.value2 is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e9450a56b3d103642e06bef0e4f0d482","queryText":"insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[5],"targets":[0,1,2,3],"expression":"src1.value is not null","edgeType":"PREDICATE"},{"sources":[5,7],"targets":[0,1,2,3],"expression":"(length(src1.value) = (length(src2.value2) + 1))","edgeType":"PREDICATE"},{"sources":[7],"targets":[0,1,2,3],"expression":"src2.value2 is not null","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: select * from src1 where length(key) > 2 PREHOOK: type: QUERY PREHOOK: Input: default@src1 @@ -530,7 +530,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src1 PREHOOK: Input: default@src2 PREHOOK: Output: default@dest2 -{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"76d84512204ddc576ad4d93f252e4358","queryText":"insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[4],"targets":[0,1,2,3],"expression":"((length(src1.key) > 3) and src1.key is not null)","edgeType":"PREDICATE"},{"sources":[4,6],"targets":[0,1,2,3],"expression":"(src1.key = src2.key2)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2,3],"expression":"((length(src2.key2) > 3) and src2.key2 is not null)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"compute_stats(default.src1.key, 16)","edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"expression":"compute_stats(default.src1.value, 16)","edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"expression":"compute_stats(default.src2.key2, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"expression":"compute_stats(default.src2.value2, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest2.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest2.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest2.key2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest2.value2"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src1.value"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src2.key2"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src2.value2"}]} PREHOOK: query: drop table if exists dest_l1 PREHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE @@ -552,7 +552,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest_l1 -{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"60b589744e2527dd235a6c8168d6a653","queryText":"INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(j.key)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"j.value","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(p1.key = t1.key)","edgeType":"PREDICATE"},{"sources":[4,2],"targets":[0,1],"expression":"(p2.key = t2.key)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(j.key), 16)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"compute_stats(j.value, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.key"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.value"},{"id":2,"vertexType":"COLUMN","vertexId":"default.src1.key"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src.value"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src.key"}]} PREHOOK: query: drop table if exists emp PREHOOK: type: DROPTABLE PREHOOK: query: drop table if exists dept @@ -593,7 +593,7 @@ PREHOOK: Input: default@dept PREHOOK: Input: default@emp PREHOOK: Input: default@project PREHOOK: Output: default@tgt -{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"f59797e0422d2e51515063374dfac361","queryText":"INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"edgeType":"PROJECTION"},{"sources":[8,11],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id is not null and e.dept_id is not null)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"(e.emp_id = m.emp_id)","edgeType":"PREDICATE"},{"sources":[8],"targets":[0,1,2,3,4,5],"expression":"m.emp_id is not null","edgeType":"PREDICATE"},{"sources":[11,12],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = d.dept_id)","edgeType":"PREDICATE"},{"sources":[12],"targets":[0,1,2,3,4,5],"expression":"d.dept_id is not null","edgeType":"PREDICATE"},{"sources":[11,9],"targets":[0,1,2,3,4,5],"expression":"(e.dept_id = p.project_id)","edgeType":"PREDICATE"},{"sources":[9],"targets":[0,1,2,3,4,5],"expression":"p.project_id is not null","edgeType":"PREDICATE"},{"sources":[6],"targets":[0],"expression":"compute_stats(default.dept.dept_name, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"expression":"compute_stats(default.emp.name, 16)","edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"expression":"compute_stats(default.emp.emp_id, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[4],"expression":"compute_stats(default.project.project_id, 16)","edgeType":"PROJECTION"},{"sources":[10],"targets":[5],"expression":"compute_stats(default.project.project_name, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.tgt.dept_name"},{"id":1,"vertexType":"COLUMN","vertexId":"default.tgt.name"},{"id":2,"vertexType":"COLUMN","vertexId":"default.tgt.emp_id"},{"id":3,"vertexType":"COLUMN","vertexId":"default.tgt.mgr_id"},{"id":4,"vertexType":"COLUMN","vertexId":"default.tgt.proj_id"},{"id":5,"vertexType":"COLUMN","vertexId":"default.tgt.proj_name"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dept.dept_name"},{"id":7,"vertexType":"COLUMN","vertexId":"default.emp.name"},{"id":8,"vertexType":"COLUMN","vertexId":"default.emp.emp_id"},{"id":9,"vertexType":"COLUMN","vertexId":"default.project.project_id"},{"id":10,"vertexType":"COLUMN","vertexId":"default.project.project_name"},{"id":11,"vertexType":"COLUMN","vertexId":"default.emp.dept_id"},{"id":12,"vertexType":"COLUMN","vertexId":"default.dept.dept_id"}]} PREHOOK: query: drop table if exists dest_l2 PREHOOK: type: DROPTABLE PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile @@ -603,7 +603,7 @@ PREHOOK: Output: default@dest_l2 PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l2 -{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(values__tmp__table__1.tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(values__tmp__table__1.tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e001334e3f8384806b0f25a7c303045f","queryText":"insert into dest_l2 values(0, 1, 100, 10000)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"UDFToByte(values__tmp__table__1.tmp_values_col2)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"UDFToInteger(values__tmp__table__1.tmp_values_col3)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToLong(values__tmp__table__1.tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(values__tmp__table__1.tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(UDFToByte(values__tmp__table__1.tmp_values_col2), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(UDFToInteger(values__tmp__table__1.tmp_values_col3), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToLong(values__tmp__table__1.tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l2.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l2.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l2.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l2.c3"}]} PREHOOK: query: select * from ( select c1 + c2 x from dest_l2 union all @@ -623,7 +623,7 @@ PREHOOK: Output: default@dest_l3 PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15) PREHOOK: type: QUERY PREHOOK: Output: default@dest_l3 -{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col4)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"09df51ba6ba2d07f2304523ee505f094","queryText":"insert into dest_l3 values(0, \"s1\", \"s2\", 15)","edges":[{"sources":[],"targets":[0],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col1)","edgeType":"PROJECTION"},{"sources":[],"targets":[1,2],"edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"UDFToInteger(values__tmp__table__2.tmp_values_col4)","edgeType":"PROJECTION"},{"sources":[],"targets":[0],"expression":"compute_stats(UDFToInteger(values__tmp__table__2.tmp_values_col1), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[1],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col2, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"compute_stats(default.values__tmp__table__2.tmp_values_col3, 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[3],"expression":"compute_stats(UDFToInteger(values__tmp__table__2.tmp_values_col4), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l3.id"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l3.c1"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l3.c2"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_l3.c3"}]} PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id) from dest_l2 a where a.c2 != 10 @@ -702,4 +702,4 @@ from relations lateral view explode(ep1_ids) rel1 as ep1_id PREHOOK: type: QUERY PREHOOK: Input: default@relations PREHOOK: Output: default@rels_exploded -{"version":"1.0","engine":"tez","database":"default","hash":"e76d2efade744d1d5cf74fda064ba6c6","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32)","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"e76d2efade744d1d5cf74fda064ba6c6","queryText":"insert into rels_exploded select identity, type,\n ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id\nfrom relations lateral view explode(ep1_ids) rel1 as ep1_id\n lateral view explode (ep2_ids) rel2 as ep2_id","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"CAST( rel1._col11 AS CHAR(32)","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"CAST( rel2._col12 AS CHAR(32)","edgeType":"PROJECTION"},{"sources":[8],"targets":[0],"expression":"compute_stats(default.relations.identity, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"expression":"compute_stats(default.relations.type, 16)","edgeType":"PROJECTION"},{"sources":[10],"targets":[2],"expression":"compute_stats(default.relations.ep1_src_type, 16)","edgeType":"PROJECTION"},{"sources":[11],"targets":[3],"expression":"compute_stats(default.relations.ep1_type, 16)","edgeType":"PROJECTION"},{"sources":[12],"targets":[4],"expression":"compute_stats(default.relations.ep2_src_type, 16)","edgeType":"PROJECTION"},{"sources":[13],"targets":[5],"expression":"compute_stats(default.relations.ep2_type, 16)","edgeType":"PROJECTION"},{"sources":[14],"targets":[6],"expression":"compute_stats(CAST( rel1._col11 AS CHAR(32), 16)","edgeType":"PROJECTION"},{"sources":[15],"targets":[7],"expression":"compute_stats(CAST( rel2._col12 AS CHAR(32), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.rels_exploded.identity"},{"id":1,"vertexType":"COLUMN","vertexId":"default.rels_exploded.type"},{"id":2,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_src_type"},{"id":3,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_type"},{"id":4,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_src_type"},{"id":5,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_type"},{"id":6,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep1_id"},{"id":7,"vertexType":"COLUMN","vertexId":"default.rels_exploded.ep2_id"},{"id":8,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":9,"vertexType":"COLUMN","vertexId":"default.relations.type"},{"id":10,"vertexType":"COLUMN","vertexId":"default.relations.ep1_src_type"},{"id":11,"vertexType":"COLUMN","vertexId":"default.relations.ep1_type"},{"id":12,"vertexType":"COLUMN","vertexId":"default.relations.ep2_src_type"},{"id":13,"vertexType":"COLUMN","vertexId":"default.relations.ep2_type"},{"id":14,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"},{"id":15,"vertexType":"COLUMN","vertexId":"default.relations.ep2_ids"}]} diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out index 25f0439..f9d1bb1 100644 --- a/ql/src/test/results/clientpositive/llap/lineage3.q.out +++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out @@ -10,7 +10,7 @@ insert into table d1 select x + length(y) PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 -{"version":"1.0","engine":"tez","database":"default","hash":"4c9b7b8d89403cef78668f15d393e542","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"4c9b7b8d89403cef78668f15d393e542","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x + length(y)","edges":[{"sources":[1,2],"targets":[0],"expression":"(UDFToInteger(a.ctinyint) + length(b.cstring1))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[1,2],"targets":[0],"expression":"compute_stats((UDFToInteger(a.ctinyint) + length(b.cstring1)), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: drop table if exists d2 PREHOOK: type: DROPTABLE PREHOOK: query: create table d2(b varchar(128)) @@ -25,7 +25,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@d1 PREHOOK: Output: default@d2 -{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"8703e4091ebd4c96afd3cac83e3a2957","queryText":"from (select a.ctinyint x, b.cstring1 y\nfrom alltypesorc a join alltypesorc b on a.cint = b.cbigint) t\ninsert into table d1 select x where y is null\ninsert into table d2 select y where x > 0","edges":[{"sources":[2],"targets":[0],"expression":"UDFToInteger(x)","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1],"expression":"a.cint is not null","edgeType":"PREDICATE"},{"sources":[3,4],"targets":[0,1],"expression":"(UDFToLong(a.cint) = b.cbigint)","edgeType":"PREDICATE"},{"sources":[4],"targets":[0,1],"expression":"b.cbigint is not null","edgeType":"PREDICATE"},{"sources":[5],"targets":[0],"expression":"t.y is null","edgeType":"PREDICATE"},{"sources":[5],"targets":[1],"expression":"CAST( y AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[1],"expression":"(t.x > 0)","edgeType":"PREDICATE"},{"sources":[2],"targets":[0],"expression":"compute_stats(UDFToInteger(x), 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.d1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.d2.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.ctinyint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: drop table if exists t PREHOOK: type: DROPTABLE PREHOOK: query: create table t as @@ -51,7 +51,7 @@ where cint is not null and cint < 0 order by cint, cs limit 5 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=today -{"version":"1.0","engine":"tez","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[2],"targets":[0,1],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"2b5891d094ff74e23ec6acf5b4990f45","queryText":"insert into table dest_l1 partition (ds='today')\nselect cint, cast(cstring1 as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cint < 0 order by cint, cs limit 5","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( alltypesorc.cstring1 AS varchar(128))","edgeType":"PROJECTION"},{"sources":[3],"targets":[0,1,2],"expression":"(alltypesorc.cint < 0)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(default.alltypesorc.cint, 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( alltypesorc.cstring1 AS varchar(128)), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'today'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"}]} PREHOOK: query: insert into table dest_l1 partition (ds='tomorrow') select min(cint), cast(min(cstring1) as varchar(128)) as cs from alltypesorc @@ -61,7 +61,7 @@ having min(cbigint) > 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc PREHOOK: Output: default@dest_l1@ds=tomorrow -{"version":"1.0","engine":"tez","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[2],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[4,2],"targets":[0,1],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[5],"targets":[0,1],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"4ad6338a8abfe3fe0342198fcbd1f11d","queryText":"insert into table dest_l1 partition (ds='tomorrow')\nselect min(cint), cast(min(cstring1) as varchar(128)) as cs\nfrom alltypesorc\nwhere cint is not null and cboolean1 = true\ngroup by csmallint\nhaving min(cbigint) > 10","edges":[{"sources":[3],"targets":[0],"expression":"min(default.alltypesorc.cint)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"CAST( min(default.alltypesorc.cstring1) AS varchar(128))","edgeType":"PROJECTION"},{"sources":[5,3],"targets":[0,1,2],"expression":"(alltypesorc.cboolean1 and alltypesorc.cint is not null)","edgeType":"PREDICATE"},{"sources":[6],"targets":[0,1,2],"expression":"(min(default.alltypesorc.cbigint) > 10)","edgeType":"PREDICATE"},{"sources":[3],"targets":[0],"expression":"compute_stats(min(default.alltypesorc.cint), 16)","edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"expression":"compute_stats(CAST( min(default.alltypesorc.cstring1) AS varchar(128)), 16)","edgeType":"PROJECTION"},{"sources":[],"targets":[2],"expression":"'tomorrow'","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_l1.a"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_l1.b"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_l1.ds"},{"id":3,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cint"},{"id":4,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cstring1"},{"id":5,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cboolean1"},{"id":6,"vertexType":"COLUMN","vertexId":"default.alltypesorc.cbigint"}]} PREHOOK: query: select cint, rank() over(order by cint) from alltypesorc where cint > 10 and cint < 10000 limit 10 PREHOOK: type: QUERY @@ -348,21 +348,23 @@ PREHOOK: query: insert into dest_dp1 partition (year) select first, word, year f PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp1 -{"version":"1.0","engine":"tez","database":"default","hash":"b2d38401a3281e74a003d9650df97060","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"b2d38401a3281e74a003d9650df97060","queryText":"insert into dest_dp1 partition (year) select first, word, year from src_dp","edges":[{"sources":[6],"targets":[0],"edgeType":"PROJECTION"},{"sources":[7],"targets":[1],"edgeType":"PROJECTION"},{"sources":[8],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[6],"targets":[4],"expression":"compute_stats(default.src_dp.first, 16)","edgeType":"PROJECTION"},{"sources":[7],"targets":[5],"expression":"compute_stats(default.src_dp.word, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"first"},{"id":5,"vertexType":"COLUMN","vertexId":"word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2 -{"version":"1.0","engine":"tez","database":"default","hash":"237302d8ffd62b5b71d9544b22de7770","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"237302d8ffd62b5b71d9544b22de7770","queryText":"insert into dest_dp2 partition (y, m) select first, word, year, month from src_dp","edges":[{"sources":[8],"targets":[0],"edgeType":"PROJECTION"},{"sources":[9],"targets":[1],"edgeType":"PROJECTION"},{"sources":[10],"targets":[2,3],"edgeType":"PROJECTION"},{"sources":[11],"targets":[4,5],"edgeType":"PROJECTION"},{"sources":[8],"targets":[6],"expression":"compute_stats(default.src_dp.first, 16)","edgeType":"PROJECTION"},{"sources":[9],"targets":[7],"expression":"compute_stats(default.src_dp.word, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":3,"vertexType":"COLUMN","vertexId":"year"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":5,"vertexType":"COLUMN","vertexId":"month"},{"id":6,"vertexType":"COLUMN","vertexId":"first"},{"id":7,"vertexType":"COLUMN","vertexId":"word"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":9,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":10,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.month"}]} PREHOOK: query: insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp2@y=0 +Result schema has 3 fields, but we don't get as many dependencies {"version":"1.0","engine":"tez","database":"default","hash":"63e990b47e7ab4eb6f2ea09dfb7453ff","queryText":"insert into dest_dp2 partition (y=0, m) select first, word, month from src_dp where year=0","edges":[{"sources":[3],"targets":[0],"edgeType":"PROJECTION"},{"sources":[4],"targets":[1],"edgeType":"PROJECTION"},{"sources":[5],"targets":[2],"edgeType":"PROJECTION"},{"sources":[6],"targets":[0,1,2],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0 PREHOOK: type: QUERY PREHOOK: Input: default@src_dp PREHOOK: Output: default@dest_dp3@y=0 +Result schema has 4 fields, but we don't get as many dependencies {"version":"1.0","engine":"tez","database":"default","hash":"6bf71a9d02c0612c63b6f40b15c1e8b3","queryText":"insert into dest_dp3 partition (y=0, m, d) select first, word, month m, day d from src_dp where year=0","edges":[{"sources":[4],"targets":[0],"edgeType":"PROJECTION"},{"sources":[5],"targets":[1],"edgeType":"PROJECTION"},{"sources":[6],"targets":[2],"edgeType":"PROJECTION"},{"sources":[7],"targets":[3],"edgeType":"PROJECTION"},{"sources":[8],"targets":[0,1,2,3],"expression":"(src_dp.year = 0)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":4,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":5,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":7,"vertexType":"COLUMN","vertexId":"default.src_dp.day"},{"id":8,"vertexType":"COLUMN","vertexId":"default.src_dp.year"}]} PREHOOK: query: drop table if exists src_dp1 PREHOOK: type: DROPTABLE @@ -385,4 +387,4 @@ PREHOOK: Output: default@dest_dp1@year=0 PREHOOK: Output: default@dest_dp2 PREHOOK: Output: default@dest_dp2@y=1 PREHOOK: Output: default@dest_dp3@y=2 -{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} +{"version":"1.0","engine":"tez","database":"default","hash":"44f16edbf35cfeaf3d4f7b0113a69b74","queryText":"from src_dp, src_dp1\ninsert into dest_dp1 partition (year) select first, word, year\ninsert into dest_dp2 partition (y, m) select first, word, year, month\ninsert into dest_dp3 partition (y=2, m, d) select first, word, month m, day d where year=2\ninsert into dest_dp2 partition (y=1, m) select f, w, m\ninsert into dest_dp1 partition (year=0) select f, w","edges":[{"sources":[11],"targets":[0,1,2],"edgeType":"PROJECTION"},{"sources":[12],"targets":[3,4,5],"edgeType":"PROJECTION"},{"sources":[13],"targets":[6,7],"edgeType":"PROJECTION"},{"sources":[14],"targets":[8,9],"edgeType":"PROJECTION"},{"sources":[15],"targets":[1,0],"edgeType":"PROJECTION"},{"sources":[16],"targets":[4,3],"edgeType":"PROJECTION"},{"sources":[17],"targets":[8],"edgeType":"PROJECTION"},{"sources":[18],"targets":[10],"edgeType":"PROJECTION"},{"sources":[13],"targets":[2,5,9,10],"expression":"(subq.col7 = 2)","edgeType":"PREDICATE"},{"sources":[11],"targets":[0],"expression":"compute_stats(default.src_dp.first, 16)","edgeType":"PROJECTION"},{"sources":[12],"targets":[3],"expression":"compute_stats(default.src_dp.word, 16)","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"default.dest_dp1.first"},{"id":1,"vertexType":"COLUMN","vertexId":"default.dest_dp2.first"},{"id":2,"vertexType":"COLUMN","vertexId":"default.dest_dp3.first"},{"id":3,"vertexType":"COLUMN","vertexId":"default.dest_dp1.word"},{"id":4,"vertexType":"COLUMN","vertexId":"default.dest_dp2.word"},{"id":5,"vertexType":"COLUMN","vertexId":"default.dest_dp3.word"},{"id":6,"vertexType":"COLUMN","vertexId":"default.dest_dp1.year"},{"id":7,"vertexType":"COLUMN","vertexId":"default.dest_dp2.y"},{"id":8,"vertexType":"COLUMN","vertexId":"default.dest_dp2.m"},{"id":9,"vertexType":"COLUMN","vertexId":"default.dest_dp3.m"},{"id":10,"vertexType":"COLUMN","vertexId":"default.dest_dp3.d"},{"id":11,"vertexType":"COLUMN","vertexId":"default.src_dp.first"},{"id":12,"vertexType":"COLUMN","vertexId":"default.src_dp.word"},{"id":13,"vertexType":"COLUMN","vertexId":"default.src_dp.year"},{"id":14,"vertexType":"COLUMN","vertexId":"default.src_dp.month"},{"id":15,"vertexType":"COLUMN","vertexId":"default.src_dp1.f"},{"id":16,"vertexType":"COLUMN","vertexId":"default.src_dp1.w"},{"id":17,"vertexType":"COLUMN","vertexId":"default.src_dp1.m"},{"id":18,"vertexType":"COLUMN","vertexId":"default.src_dp.day"}]} diff --git a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out index cdb688b..ffdcaa4 100644 --- a/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out +++ b/ql/src/test/results/clientpositive/llap/list_bucket_dml_10.q.out @@ -27,11 +27,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -73,6 +77,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 179000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -126,6 +149,42 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1140 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -162,6 +221,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src PREHOOK: type: QUERY @@ -204,7 +271,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index d35501e..055f6f1 100644 --- a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1630,7 +1630,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: oft - Statistics: Num rows: 12288 Data size: 5280746 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 12288 Data size: 1884148 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1640,14 +1640,14 @@ STAGE PLANS: outputColumnNames: _col1, _col6, _col7, _col10 input vertices: 1 Map 2 - Statistics: Num rows: 960 Data size: 497280 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 195840 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col10 (type: tinyint), _col1 (type: int), _col6 (type: char(255)), _col7 (type: varchar(255)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 960 Data size: 3840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 195840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 960 Data size: 3840 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 960 Data size: 195840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out index f81ad50..4404ad0 100644 --- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out @@ -108,11 +108,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: llap_stats - Statistics: Num rows: 10 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint - Statistics: Num rows: 10 Data size: 116 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: compute_stats(ctinyint, 16), compute_stats(csmallint, 16) keys: cint (type: int) diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out index 6ad9af8..91eb14d 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part1.q.out @@ -57,6 +57,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -64,6 +66,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -85,6 +91,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1352 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 363636 Basic stats: COMPLETE Column stats: COMPLETE @@ -100,8 +122,64 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 303696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1262 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1262 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1328 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1238 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -122,6 +200,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out index ce5517a..5eadad5 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part2.q.out @@ -39,6 +39,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -46,6 +47,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -79,6 +81,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 912000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2524 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2 Data size: 2524 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 2476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -99,6 +137,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket + PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out index b34975f..52b4c87 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part3.q.out @@ -45,11 +45,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -68,8 +72,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 5408 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 4 Data size: 5408 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 4 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 4 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -90,6 +130,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 + PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out index 9c4e8a8..6370c63 100644 --- a/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out +++ b/ql/src/test/results/clientpositive/llap/load_dyn_part5.q.out @@ -32,11 +32,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -55,8 +59,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 214 Data size: 124762 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 214 Data size: 124762 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 214 Data size: 122194 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 214 Data size: 122194 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 214 Data size: 122194 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -76,6 +116,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part5 + PREHOOK: query: insert overwrite table nzhang_part5 partition (value) select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/llap/mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/mapjoin3.q.out index cf1bebb..b61987b 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin3.q.out @@ -105,11 +105,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -119,14 +119,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col2 (type: varchar(100)), _col3 (type: varchar(100)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,16 +137,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint), age (type: varchar(100)), age (type: varchar(100)) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: varchar(100)), _col2 (type: varchar(100)) Execution mode: llap LLAP IO: no inputs diff --git a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out index 73960ce..cc2cc51 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -62,11 +62,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -76,10 +76,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,16 +90,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -162,11 +162,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -179,10 +179,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -193,19 +193,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -269,11 +269,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -286,10 +286,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -300,17 +300,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -370,16 +370,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -387,11 +387,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -401,10 +401,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,11 +465,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -482,10 +482,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -496,14 +496,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -572,11 +572,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -587,10 +587,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -601,14 +601,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -680,11 +680,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -695,10 +695,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -709,14 +709,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -786,11 +786,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -801,10 +801,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,14 +815,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -888,11 +888,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -903,10 +903,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -917,16 +917,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -993,14 +993,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1008,11 +1008,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1023,10 +1023,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1101,14 +1101,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1116,11 +1116,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1131,10 +1131,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1204,14 +1204,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1219,11 +1219,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1234,10 +1234,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1308,16 +1308,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1325,11 +1325,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1340,10 +1340,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1411,14 +1411,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1426,14 +1426,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1448,10 +1448,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1524,14 +1524,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1539,14 +1539,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1561,10 +1561,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1635,14 +1635,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1650,14 +1650,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1672,10 +1672,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1744,16 +1744,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1761,16 +1761,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1785,10 +1785,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1886,16 +1886,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1903,11 +1903,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1918,10 +1918,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1929,11 +1929,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -1944,10 +1944,10 @@ STAGE PLANS: input vertices: 1 Map 5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1955,16 +1955,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1979,10 +1979,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} - Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 64 Data size: 24448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out index 38b8f92..eb638e9 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_decimal.q.out @@ -95,14 +95,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -112,11 +112,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(4,2)) sort order: + - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(4,0)) Execution mode: llap LLAP IO: all inputs @@ -124,19 +124,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(6,2)) sort order: + Map-reduce partition columns: _col0 (type: decimal(6,2)) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -145,10 +145,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: decimal(4,2)), VALUE._col0 (type: decimal(4,0)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out b/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out index 81a8546..ab3db1a 100644 --- a/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out +++ b/ql/src/test/results/clientpositive/llap/mapjoin_emit_interval.q.out @@ -62,11 +62,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -79,10 +79,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -93,16 +93,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -161,11 +161,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -178,10 +178,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -192,14 +192,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs diff --git a/ql/src/test/results/clientpositive/llap/mapreduce1.q.out b/ql/src/test/results/clientpositive/llap/mapreduce1.q.out index 37f92d9..881bb79 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce1.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce1.q.out @@ -27,6 +27,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -34,6 +35,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +75,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -90,6 +120,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/llap/mapreduce2.q.out b/ql/src/test/results/clientpositive/llap/mapreduce2.q.out index 71bbb7e..53fa241 100644 --- a/ql/src/test/results/clientpositive/llap/mapreduce2.q.out +++ b/ql/src/test/results/clientpositive/llap/mapreduce2.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -32,6 +33,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -70,6 +72,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -87,6 +117,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/llap/merge1.q.out b/ql/src/test/results/clientpositive/llap/merge1.q.out index 4bcb728..76e31dd 100644 --- a/ql/src/test/results/clientpositive/llap/merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/merge1.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 205 Data size: 1640 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -89,6 +119,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 + PREHOOK: query: insert overwrite table dest1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -473,31 +510,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -515,6 +584,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + PREHOOK: query: insert overwrite table dest1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src @@ -539,31 +615,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -581,6 +689,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + PREHOOK: query: insert overwrite table dest1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src diff --git a/ql/src/test/results/clientpositive/llap/merge2.q.out b/ql/src/test/results/clientpositive/llap/merge2.q.out index aa0567b..144acc2 100644 --- a/ql/src/test/results/clientpositive/llap/merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/merge2.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -26,6 +27,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 205 Data size: 1640 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -89,6 +119,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 + PREHOOK: query: insert overwrite table test1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -473,31 +510,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -515,6 +584,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + PREHOOK: query: insert overwrite table test1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src @@ -539,31 +615,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: test_src - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -581,6 +689,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + PREHOOK: query: insert overwrite table test1 select key from test_src PREHOOK: type: QUERY PREHOOK: Input: default@test_src diff --git a/ql/src/test/results/clientpositive/llap/mergejoin.q.out b/ql/src/test/results/clientpositive/llap/mergejoin.q.out index 45e58e0..61fd14d 100644 --- a/ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -293,19 +293,19 @@ STAGE PLANS: TableScan alias: a filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 @@ -313,19 +313,19 @@ STAGE PLANS: TableScan alias: b filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -337,15 +337,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -354,10 +354,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1382,32 +1382,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1419,15 +1419,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1436,10 +1436,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1493,32 +1493,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1530,15 +1530,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1547,10 +1547,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1604,32 +1604,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1641,15 +1641,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 5936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1658,10 +1658,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1704,9 +1704,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1714,41 +1716,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_13_c_value_min) AND DynamicValue(RS_13_c_value_max) and in_bloom_filter(value, DynamicValue(RS_13_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_13_c_value_min) AND DynamicValue(RS_13_c_value_max) and in_bloom_filter(value, DynamicValue(RS_13_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map 6 @@ -1784,6 +1766,26 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: b + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1791,15 +1793,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=54) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1807,17 +1822,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 61710 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 137 Data size: 1096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -1826,14 +1841,26 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=54) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -1892,19 +1919,19 @@ STAGE PLANS: TableScan alias: a filterExpr: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 @@ -1912,19 +1939,19 @@ STAGE PLANS: TableScan alias: b filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1936,15 +1963,15 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col1 (type: string) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 565 Data size: 4520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1953,10 +1980,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2015,19 +2042,19 @@ STAGE PLANS: TableScan alias: s1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 6 @@ -2035,19 +2062,19 @@ STAGE PLANS: TableScan alias: s3 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 7 @@ -2055,19 +2082,19 @@ STAGE PLANS: TableScan alias: s2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 2968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 8 @@ -2075,19 +2102,19 @@ STAGE PLANS: TableScan alias: b filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2100,12 +2127,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 266 Data size: 27152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 2968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2115,15 +2142,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 558 Data size: 57019 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1892 Data size: 15136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -2132,10 +2159,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2171,19 +2198,19 @@ STAGE PLANS: TableScan alias: a filterExpr: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 4 @@ -2191,19 +2218,19 @@ STAGE PLANS: TableScan alias: b filterExpr: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col1 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2215,15 +2242,15 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col1 (type: string) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 565 Data size: 4520 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -2232,10 +2259,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2276,9 +2303,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Reducer 7 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Map 8 <- Reducer 5 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2286,41 +2315,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_13_c_value_min) AND DynamicValue(RS_13_c_value_max) and in_bloom_filter(value, DynamicValue(RS_13_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + filterExpr: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_13_c_value_min) AND DynamicValue(RS_13_c_value_max) and in_bloom_filter(value, DynamicValue(RS_13_c_value_bloom_filter)))) (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + predicate: (key is not null and value is not null and (value BETWEEN DynamicValue(RS_10_c_value_min) AND DynamicValue(RS_10_c_value_max) and in_bloom_filter(value, DynamicValue(RS_10_c_value_bloom_filter)))) (type: boolean) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 - Map Operator Tree: - TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs Map 6 @@ -2356,6 +2365,26 @@ STAGE PLANS: value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: no inputs + Map 8 + Map Operator Tree: + TableScan + alias: b + filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (key is not null and (key BETWEEN DynamicValue(RS_12_a_key_min) AND DynamicValue(RS_12_a_key_max) and in_bloom_filter(key, DynamicValue(RS_12_a_key_bloom_filter)))) (type: boolean) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -2363,15 +2392,28 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 54 Data size: 216 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=54) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2379,17 +2421,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 61710 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 137 Data size: 1096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -2398,14 +2440,26 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=54) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -2474,19 +2528,19 @@ STAGE PLANS: TableScan alias: s1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 6 @@ -2494,19 +2548,19 @@ STAGE PLANS: TableScan alias: s3 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 7 @@ -2514,19 +2568,19 @@ STAGE PLANS: TableScan alias: s2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 2968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 8 @@ -2534,19 +2588,19 @@ STAGE PLANS: TableScan alias: b filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2559,12 +2613,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 266 Data size: 27152 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 508 Data size: 51836 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 742 Data size: 2968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -2574,15 +2628,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 558 Data size: 57019 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1892 Data size: 15136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: vectorized, llap @@ -2591,10 +2645,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2646,18 +2700,18 @@ STAGE PLANS: TableScan alias: t1 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map 5 @@ -2665,18 +2719,18 @@ STAGE PLANS: TableScan alias: t2 filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2685,12 +2739,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 24684 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2700,15 +2754,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 56100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: vectorized, llap @@ -2717,10 +2771,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2731,12 +2785,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out index 81483f1..f91f571 100644 --- a/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/llap/metadata_only_queries.q.out @@ -183,56 +183,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -243,56 +199,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -303,60 +215,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -367,60 +231,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -646,19 +462,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9489 Data size: 379560 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ts (type: timestamp) outputColumnNames: ts - Statistics: Num rows: 9489 Data size: 1054697 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9489 Data size: 379560 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(ts) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -669,10 +485,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out index 73ccdf5..435e6ca 100644 --- a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out @@ -21,6 +21,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: create table smallTbl2(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -44,6 +45,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: create table smallTbl3(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -67,6 +69,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: create table smallTbl4(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -90,6 +93,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: create table bigTbl(key string, value string) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -157,6 +161,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: explain select count(*) FROM (select bigTbl.key as key, bigTbl.value as value1, @@ -192,14 +197,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -209,11 +214,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -222,15 +227,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -238,38 +243,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -279,10 +284,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -355,14 +360,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -372,11 +377,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -385,15 +390,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 1760 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -401,38 +406,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -442,10 +447,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -520,14 +525,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5000 Data size: 53120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 890000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -537,11 +542,11 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 5500 Data size: 58432 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 22113 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -551,18 +556,18 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 18700 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 6050 Data size: 64275 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -570,38 +575,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -613,14 +618,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 3025 Data size: 32137 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 3025 Data size: 32137 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3025 Data size: 32137 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 7 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -742,6 +747,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: EXPLAIN SELECT SUM(HASH(join3.key1)), SUM(HASH(join3.key2)), @@ -825,14 +831,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key1 is not null and value is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key1 (type: string), key2 (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -842,11 +848,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 85050 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -856,11 +862,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 117700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -870,11 +876,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 5 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 136400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -884,19 +890,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 input vertices: 1 Map 6 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -904,38 +910,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -943,38 +949,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl3 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: smalltbl4 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -984,10 +990,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1160,14 +1166,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key1 is not null and value is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key1 (type: string), key2 (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1177,11 +1183,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 85050 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1191,11 +1197,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 4 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 117700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1205,11 +1211,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 input vertices: 1 Map 5 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 136400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1219,19 +1225,19 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 input vertices: 1 Map 6 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1239,38 +1245,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1278,38 +1284,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl3 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: smalltbl4 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1319,10 +1325,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1487,177 +1493,208 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 3 <- Map 8 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Map 9 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 10 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key1 is not null and value is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key1 (type: string), key2 (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - input vertices: - 1 Map 3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - input vertices: - 1 Map 4 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - input vertices: - 1 Map 5 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - input vertices: - 1 Map 6 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string), _col2 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 10 + Map Operator Tree: + TableScan + alias: smalltbl4 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 7 Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 8 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 9 Map Operator Tree: TableScan alias: smalltbl3 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Execution mode: llap - LLAP IO: no inputs - Map 6 - Map Operator Tree: - TableScan - alias: smalltbl4 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 243 Data size: 85050 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col3 (type: string) + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 220 Data size: 117700 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 220 Data size: 136400 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: string) + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1833,19 +1870,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: bigtbl - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key1 is not null and value is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key1 (type: string), key2 (type: string), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5000 Data size: 72180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5000 Data size: 1325000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1853,57 +1890,57 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl4 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: smalltbl1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: smalltbl2 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: value is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1740 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1911,19 +1948,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: smalltbl3 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 850 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1936,16 +1973,16 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 85050 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 5500 Data size: 79398 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 243 Data size: 107163 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reducer 3 Execution mode: llap @@ -1957,16 +1994,16 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 117700 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 6050 Data size: 87337 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 95700 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string) Reducer 4 Execution mode: llap @@ -1978,16 +2015,16 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 136400 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 6655 Data size: 96070 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 220 Data size: 114400 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string) Reducer 5 Execution mode: llap @@ -1999,19 +2036,19 @@ STAGE PLANS: 0 _col2 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 7320 Data size: 105677 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 314 Data size: 221370 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint) Reducer 6 Execution mode: llap @@ -2020,10 +2057,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3), sum(VALUE._col4), sum(VALUE._col5), sum(VALUE._col6), sum(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out index e9c016e..41fd334 100644 --- a/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out +++ b/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out @@ -1718,6 +1718,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: INSERT OVERWRITE TABLE part_table PARTITION (partitionId=2) SELECT key, value FROM src1 ORDER BY key, value PREHOOK: type: QUERY @@ -1734,6 +1735,7 @@ RUN: Stage-1:MAPRED RUN: Stage-2:DEPENDENCY_COLLECTION RUN: Stage-0:MOVE RUN: Stage-3:STATS +RUN: Stage-4:COLUMNSTATS PREHOOK: query: EXPLAIN SELECT count(*) FROM part_table x JOIN src1 y ON (x.key = y.key) @@ -1759,14 +1761,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: x - Statistics: Num rows: 125 Data size: 2261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 125 Data size: 2261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 125 Data size: 2261 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1775,15 +1777,15 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 137 Data size: 2487 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 72 Data size: 576 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1813,10 +1815,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out b/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out index ad74867..36a728f 100644 --- a/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out @@ -40,21 +40,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee - Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: department_id (type: int), gender (type: varchar(10)), education_level (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 36 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: int) sort order: ++++ Map-reduce partition columns: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 36 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -64,19 +64,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: varchar(10)), KEY._col2 (type: int), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 18 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN (((_col3 = 3) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 5) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 6) and _col2 is not null)) THEN (1) ELSE (null) END (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 18 Data size: 99 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), count(_col1), count(_col2) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -85,10 +85,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -181,21 +181,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: employee - Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: gender (type: varchar(10)), department_id (type: int), education_level (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 66 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 60 Data size: 330 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), _col3 (type: int) sort order: ++++ Map-reduce partition columns: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 60 Data size: 330 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -205,19 +205,19 @@ STAGE PLANS: keys: KEY._col0 (type: varchar(10)), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 30 Data size: 165 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CASE WHEN (((_col3 = 3) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 5) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 6) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN ((_col3 = 4)) THEN (1) ELSE (null) END (type: int), CASE WHEN ((_col3 = 0)) THEN (1) ELSE (null) END (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 30 Data size: 165 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), count(_col1), count(_col2), count(_col3), count(_col4) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -226,14 +226,14 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), _col1 (type: bigint), _col0 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/multi_insert.q.out b/ql/src/test/results/clientpositive/llap/multi_insert.q.out index 58fc759..c4a7668 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -36,6 +38,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -57,6 +63,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -72,8 +91,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -91,6 +153,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -172,6 +248,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -179,6 +257,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -200,6 +282,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -215,8 +310,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -234,6 +372,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -315,6 +467,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -322,6 +476,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -343,6 +501,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -358,8 +529,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -377,6 +591,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -458,6 +686,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -465,6 +695,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -486,6 +720,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE @@ -501,8 +748,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 9790 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -520,6 +810,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -601,6 +905,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -610,6 +916,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -648,6 +956,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -664,6 +985,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -681,6 +1045,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -755,6 +1133,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -764,6 +1144,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -802,6 +1184,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -818,6 +1213,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -835,6 +1273,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -909,6 +1361,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -918,6 +1372,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -956,6 +1412,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -972,6 +1441,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -989,6 +1501,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1063,6 +1589,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1072,6 +1600,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1110,6 +1640,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 73 Data size: 12994 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE @@ -1126,6 +1669,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1143,6 +1729,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1217,6 +1817,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1226,7 +1828,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1249,6 +1853,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1260,9 +1877,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1282,6 +1912,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1293,8 +1936,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1314,6 +2000,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1414,6 +2114,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1423,7 +2125,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1446,6 +2150,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1457,9 +2174,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1479,6 +2209,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1490,8 +2233,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1511,6 +2297,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1611,6 +2411,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1620,7 +2422,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1643,6 +2447,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1654,9 +2471,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1676,6 +2506,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1687,8 +2530,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1708,6 +2594,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1808,6 +2708,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1817,7 +2719,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 5 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1840,6 +2744,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1851,9 +2768,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 5 Map Operator Tree: TableScan alias: src @@ -1873,6 +2803,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 59274 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE @@ -1884,8 +2827,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 19758 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -1905,6 +2891,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out b/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out index e5497d5..49b70a3 100644 --- a/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out +++ b/ql/src/test/results/clientpositive/llap/multi_insert_lateral_view.q.out @@ -49,6 +49,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -56,6 +58,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -83,6 +89,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -105,6 +124,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -126,6 +158,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 @@ -148,8 +193,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -167,6 +255,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + Stage: Stage-1 Move Operator tables: @@ -269,6 +371,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -278,7 +382,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -395,10 +501,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -416,6 +550,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -433,6 +595,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + Stage: Stage-1 Move Operator tables: @@ -517,6 +693,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -528,7 +707,10 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -611,9 +793,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -637,6 +847,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -658,6 +881,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -675,6 +941,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + Stage: Stage-1 Move Operator tables: @@ -786,6 +1073,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -797,8 +1087,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -926,10 +1219,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -947,7 +1268,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -968,6 +1317,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -985,6 +1362,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + Stage: Stage-1 Move Operator tables: @@ -1136,6 +1534,10 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-12 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-13 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-5 @@ -1149,8 +1551,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1271,10 +1677,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -1292,7 +1726,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Forward @@ -1318,6 +1780,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -1339,6 +1814,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -1356,6 +1874,34 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv4 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out index 1cc9d61..1c69964 100644 --- a/ql/src/test/results/clientpositive/llap/orc_analyze.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_analyze.q.out @@ -281,7 +281,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 52600 @@ -728,7 +728,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 1 numRows 50 rawDataSize 21950 @@ -771,7 +771,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 1 numRows 50 rawDataSize 22050 @@ -1230,7 +1230,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 4 numRows 50 rawDataSize 21975 @@ -1273,7 +1273,7 @@ Database: default Table: orc_create_people #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\"}} numFiles 4 numRows 50 rawDataSize 22043 diff --git a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out index ba29491..18573a6 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out @@ -55,11 +55,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +82,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +140,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -136,6 +183,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -144,6 +192,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -162,8 +213,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -193,6 +280,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-4 Tez #### A masked pattern was here #### @@ -265,6 +359,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -273,6 +368,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -291,8 +389,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -322,6 +456,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out index dd5d1cb..08da6ac 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out @@ -55,11 +55,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +82,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +140,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -136,6 +183,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -144,6 +192,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -162,8 +213,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -193,6 +280,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-4 Tez #### A masked pattern was here #### @@ -265,6 +359,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -273,6 +368,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -291,8 +389,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -322,6 +456,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out index c38852a..6c3bddf 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out @@ -29,11 +29,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -52,8 +56,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 710500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 710500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +115,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, PMOD(HASH(value), 10) as three diff --git a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out index fd79d9d..735ff50 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge3.q.out @@ -59,31 +59,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: orcfile_merge3a - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -101,6 +133,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge3b SELECT key, value FROM orcfile_merge3a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out index 6e2617b..dcb5b18 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge4.q.out @@ -77,31 +77,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: orcfile_merge3a - Statistics: Num rows: 1000 Data size: 102000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 102000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 102000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -119,6 +151,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge3b SELECT key, value FROM orcfile_merge3a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out index 47096bb..6688f3b 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -58,8 +62,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -77,6 +109,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -122,6 +161,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -130,6 +170,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -152,8 +195,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -180,6 +251,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out index 3969a96..31ef896 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -58,8 +62,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -80,6 +120,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -167,6 +214,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -175,6 +223,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -197,8 +248,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -228,6 +315,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out index d86d2dc..463ba7b 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -54,8 +58,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +115,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -201,6 +248,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -209,6 +257,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -227,8 +278,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -257,6 +344,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out index ba29491..18573a6 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out @@ -55,11 +55,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -78,8 +82,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -100,6 +140,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -136,6 +183,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -144,6 +192,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -162,8 +213,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -193,6 +280,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-4 Tez #### A masked pattern was here #### @@ -265,6 +359,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-5, Stage-4, Stage-7 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -273,6 +368,9 @@ STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -291,8 +389,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 250 Data size: 309250 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 205 Data size: 251945 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -322,6 +456,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-4 Tez #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out index e5101f4..ba18346 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -57,8 +61,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -76,6 +108,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out index 6fcb45a..c3ef446 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out @@ -31,11 +31,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -54,8 +58,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Execution mode: llap LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +115,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out index 8c6f97e..9c523f3 100644 --- a/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_predicate_pushdown.q.out @@ -133,19 +133,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -156,10 +156,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -191,19 +191,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -214,10 +214,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -321,22 +321,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -347,10 +347,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -389,22 +389,22 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t < 0) and (UDFToInteger(t) > -2)) (type: boolean) - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 116 Data size: 34409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 116 Data size: 464 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -415,10 +415,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -589,18 +589,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -609,10 +609,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -655,18 +655,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -675,10 +675,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 8602 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -786,18 +786,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -808,13 +808,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -865,18 +865,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -887,13 +887,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1010,18 +1010,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_pred - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1032,14 +1032,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1048,13 +1048,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1110,18 +1110,18 @@ STAGE PLANS: TableScan alias: orc_pred filterExpr: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1132,14 +1132,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1148,13 +1148,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/parallel.q.out b/ql/src/test/results/clientpositive/llap/parallel.q.out index 7dba122..95ed8b8 100644 --- a/ql/src/test/results/clientpositive/llap/parallel.q.out +++ b/ql/src/test/results/clientpositive/llap/parallel.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -39,6 +41,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -93,6 +97,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -106,6 +123,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -123,6 +183,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out index aecbcfd..c3f6850 100644 --- a/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out +++ b/ql/src/test/results/clientpositive/llap/parquet_predicate_pushdown.q.out @@ -125,19 +125,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -148,10 +148,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -183,19 +183,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -206,10 +206,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -527,18 +527,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -547,10 +547,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -593,18 +593,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 105941 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (s is not null and (s like 'bob%') and (not (t) IN (-1, -2, -3)) and t BETWEEN 25 AND 30) (type: boolean) - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), s (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: string) sort order: ++ - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -613,10 +613,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 29 Data size: 319 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 53 Data size: 5353 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -724,18 +724,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -746,13 +746,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -803,18 +803,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((d >= 10.0) and (d < 12.0) and (s like '%son') and (t > 0) and UDFToInteger(si) BETWEEN 300 AND 400 and (not (s like '%car%'))) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -825,13 +825,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -993,18 +993,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tbl_pred - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1015,14 +1015,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1031,13 +1031,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1093,18 +1093,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 118521 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((t > 10) and (t <> 101) and (d >= 10) and (d < 12.0) and (s like '%son') and (not (s like '%car%')) and (t > 0) and si BETWEEN 300 AND 400) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), d (type: double), s (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Execution mode: llap @@ -1115,14 +1115,14 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: double) Reducer 3 @@ -1131,13 +1131,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: double), KEY.reducesinkkey0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1205,18 +1205,18 @@ STAGE PLANS: TableScan alias: tbl_pred filterExpr: ((f < 123.2) and (f > 1.92) and (f >= 9.99) and f BETWEEN 1.92 AND 123.2 and (i < 67627) and (i > 60627) and (i >= 60626) and i BETWEEN 60626 AND 67627 and (b < 4294967861) and (b > 4294967261) and (b >= 4294967260) and b BETWEEN 4294967261 AND 4294967861) (type: boolean) - Statistics: Num rows: 1049 Data size: 11539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 16784 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((f < 123.2) and (f > 1.92) and (f >= 9.99) and f BETWEEN 1.92 AND 123.2 and (i < 67627) and (i > 60627) and (i >= 60626) and i BETWEEN 60626 AND 67627 and (b < 4294967861) and (b > 4294967261) and (b >= 4294967260) and b BETWEEN 4294967261 AND 4294967861) (type: boolean) - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: f (type: float), i (type: int), b (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: bigint) Execution mode: llap @@ -1227,14 +1227,14 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 608 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: - - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: int), _col2 (type: bigint) Reducer 3 @@ -1243,13 +1243,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: int), VALUE._col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 3 - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out b/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out index 543d176..8d328b5 100644 --- a/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out +++ b/ql/src/test/results/clientpositive/llap/ppd_union_view.q.out @@ -166,13 +166,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-13' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -197,22 +197,22 @@ STAGE PLANS: alias: t1_old properties: insideView TRUE - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 181 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: keymap is not null (type: boolean) - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 181 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: keymap (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string) auto parallelism: true @@ -229,7 +229,7 @@ STAGE PLANS: partition values: ds 2011-10-13 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"keymap":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns keymap,value @@ -276,22 +276,22 @@ STAGE PLANS: alias: t1_mapping properties: insideView TRUE - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: keymap is not null (type: boolean) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), keymap (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 273 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col0 (type: string) auto parallelism: true @@ -308,7 +308,7 @@ STAGE PLANS: partition values: ds 2011-10-13 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","keymap":"true"}} bucket_count -1 column.name.delimiter , columns key,keymap @@ -361,21 +361,21 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col1, _col3 Position of Big Table: 0 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-13' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -462,22 +462,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1_new - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-15' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -507,7 +507,7 @@ STAGE PLANS: partition values: ds 2011-10-15 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -622,13 +622,13 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col1 (type: string), '2011-10-15' (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 2 Data size: 188 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 544 Basic stats: COMPLETE Column stats: PARTIAL #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/ptf.q.out b/ql/src/test/results/clientpositive/llap/ptf.q.out index fbaf1e6..8448ed2 100644 --- a/ql/src/test/results/clientpositive/llap/ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/ptf.q.out @@ -2956,6 +2956,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -2966,8 +2968,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3069,9 +3073,37 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(s, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 @@ -3106,7 +3138,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: sum_window_0 (type: bigint), _col5 (type: int) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -3166,6 +3198,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(s2, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(cud, 16), compute_stats(fv1, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -3183,6 +3243,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out b/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out index 7d1f526..ac09b63 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_createas1.q.out @@ -68,14 +68,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: rcfile_createas1a - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), (hash(key) pmod 50) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out index 2bb8e8a..edbe3d1 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out @@ -29,11 +29,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -52,8 +56,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.rcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 710500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 710500 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 706500 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -75,6 +115,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge2a + PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge2a PARTITION (one='1', two, three) SELECT key, value, PMOD(HASH(key), 10) as two, PMOD(HASH(value), 10) as three diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out index 11b0b48..8a81627 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge3.q.out @@ -59,31 +59,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: rcfile_merge3a - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.rcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -101,6 +133,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge3b + PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out index 443014b..ac85b19 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge4.q.out @@ -59,31 +59,63 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: rcfile_merge3a - Statistics: Num rows: 1000 Data size: 17624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 17624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 17624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.rcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -101,6 +133,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.rcfile_merge3b + PREHOOK: query: INSERT OVERWRITE TABLE rcfile_merge3b SELECT key, value FROM rcfile_merge3a PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out index 2cc6f5d..a6efef7 100644 --- a/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -141,6 +142,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -178,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.bucket5_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket5_1 select * from src cluster by key PREHOOK: type: QUERY @@ -261,6 +305,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -268,6 +313,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -356,6 +402,61 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string), '2010-03-29' (type: string) + outputColumnNames: aid, bid, t, ctime, etime, l, et, ds + Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(aid, 16), compute_stats(bid, 16), compute_stats(t, 16), compute_stats(ctime, 16), compute_stats(etime, 16), compute_stats(l, 16), compute_stats(et, 16) + keys: '2010-03-29' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3506 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: '2010-03-29' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2010-03-29' (type: string) + Statistics: Num rows: 1 Data size: 3506 Basic stats: COMPLETE Column stats: PARTIAL + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + auto parallelism: true + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + keys: '2010-03-29' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3454 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), '2010-03-29' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3454 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 3454 Basic stats: COMPLETE Column stats: PARTIAL +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types struct:struct:struct:struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -391,3 +492,11 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: aid, bid, t, ctime, etime, l, et + Column Types: string, string, int, string, bigint, string, string + Table: default.complex_tbl_1 + Is Table Level Stats: false + diff --git a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out index e5b8d11..2955ee6 100644 --- a/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/reduce_deduplicate_distinct.q.out @@ -39,22 +39,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT key), count(DISTINCT name) keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -65,10 +65,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -118,21 +118,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -142,18 +142,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col1), count(DISTINCT _col2) keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -162,10 +162,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -215,21 +215,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -239,18 +239,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col2), count(DISTINCT _col1) keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -259,10 +259,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -312,21 +312,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -336,18 +336,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col1), count(DISTINCT _col2) keys: _col0 (type: int), _col1 (type: int), _col2 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -356,10 +356,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -409,21 +409,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_distinct_test - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), key (type: int), name (type: int) outputColumnNames: id, key, name - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: id (type: int), key (type: int), name (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) - Statistics: Num rows: 5 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -433,18 +433,18 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT _col2), count(DISTINCT _col1) keys: _col0 (type: int), _col2 (type: int), _col1 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -453,10 +453,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/sample1.q.out b/ql/src/test/results/clientpositive/llap/sample1.q.out index 61a3a0f..e590b10 100644 --- a/ql/src/test/results/clientpositive/llap/sample1.q.out +++ b/ql/src/test/results/clientpositive/llap/sample1.q.out @@ -21,11 +21,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -73,6 +77,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(dt, 16), compute_stats(hr, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -128,6 +148,37 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Reducer 2 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -164,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s WHERE s.ds='2008-04-08' and s.hr='11' diff --git a/ql/src/test/results/clientpositive/llap/sample10.q.out b/ql/src/test/results/clientpositive/llap/sample10.q.out index a3da787..8552a3a 100644 --- a/ql/src/test/results/clientpositive/llap/sample10.q.out +++ b/ql/src/test/results/clientpositive/llap/sample10.q.out @@ -56,29 +56,29 @@ STAGE PLANS: Map Operator Tree: TableScan alias: srcpartbucket - Statistics: Num rows: 40 Data size: 7600 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 40 Data size: 10760 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: true predicate: (((hash(key) & 2147483647) % 4) = 0) (type: boolean) sampleDesc: BUCKET 1 OUT OF 4 - Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 5380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ds (type: string) outputColumnNames: ds - Statistics: Num rows: 20 Data size: 3680 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 20 Data size: 5380 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: ds (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: true @@ -96,7 +96,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -146,7 +146,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -196,7 +196,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -246,7 +246,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -301,12 +301,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false @@ -317,13 +317,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out index 22c7745..c284b5c 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_nonvec_part_all_complex.q.out @@ -431,14 +431,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 4912 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out index d0cafaa..7d5d073 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_orc_vec_part_all_complex.q.out @@ -437,14 +437,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 4912 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out index 12c7db4..591e54a 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_nonvec_part_all_complex.q.out @@ -431,14 +431,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out index 97270fc..89a2554 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vec_part_all_complex.q.out @@ -437,14 +437,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out index b35bccc..e8c3fe0 100644 --- a/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out +++ b/ql/src/test/results/clientpositive/llap/schema_evol_text_vecrow_part_all_complex.q.out @@ -437,14 +437,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_add_various_various_struct2 - Statistics: Num rows: 8 Data size: 939 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL Select Operator expressions: insert_num (type: int), part (type: int), b (type: string), s2 (type: struct) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 32 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 8 Data size: 800 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/skewjoin.q.out b/ql/src/test/results/clientpositive/llap/skewjoin.q.out index ff770f1..2e9a367 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoin.q.out @@ -83,13 +83,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -111,7 +113,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -154,6 +156,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1219 Data size: 115805 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -171,6 +201,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out b/ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out index 5d7f371..051a50d 100644 --- a/ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out +++ b/ql/src/test/results/clientpositive/llap/skewjoinopt15.q.out @@ -88,19 +88,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -108,19 +108,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -134,10 +134,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -189,16 +189,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -206,16 +206,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), val (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -229,10 +229,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -287,38 +287,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -330,15 +330,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -347,10 +347,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -396,32 +396,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -433,15 +433,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -450,10 +450,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_cache.q.out b/ql/src/test/results/clientpositive/llap/smb_cache.q.out index 932b211..05162f6 100644 --- a/ql/src/test/results/clientpositive/llap/smb_cache.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_cache.q.out @@ -211,26 +211,26 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=579 width=8) + Merge Join Operator [MERGEJOIN_15] (rows=123 width=8) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=130 width=8) + Select Operator [SEL_2] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_13] (rows=130 width=8) + Filter Operator [FIL_13] (rows=130 width=4) predicate:userid is not null - TableScan [TS_0] (rows=130 width=8) - default@bug_201_input_b,t1,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_0] (rows=130 width=4) + default@bug_201_input_b,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=527 width=8) + Select Operator [SEL_5] (rows=527 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=527 width=8) + Filter Operator [FIL_14] (rows=527 width=4) predicate:userid is not null - TableScan [TS_3] (rows=527 width=8) - default@bug_201_input_a,fa,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_3] (rows=527 width=4) + default@bug_201_input_a,fa,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] PREHOOK: query: select t1.userid, @@ -291,26 +291,26 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=579 width=8) + Merge Join Operator [MERGEJOIN_15] (rows=123 width=8) Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] llap SHUFFLE [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=130 width=8) + Select Operator [SEL_2] (rows=130 width=4) Output:["_col0"] - Filter Operator [FIL_13] (rows=130 width=8) + Filter Operator [FIL_13] (rows=130 width=4) predicate:userid is not null - TableScan [TS_0] (rows=130 width=8) - default@bug_201_input_b,t1,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_0] (rows=130 width=4) + default@bug_201_input_b,t1,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] <-Map 3 [SIMPLE_EDGE] llap SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=527 width=8) + Select Operator [SEL_5] (rows=527 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=527 width=8) + Filter Operator [FIL_14] (rows=527 width=4) predicate:userid is not null - TableScan [TS_3] (rows=527 width=8) - default@bug_201_input_a,fa,Tbl:COMPLETE,Col:NONE,Output:["userid"] + TableScan [TS_3] (rows=527 width=4) + default@bug_201_input_a,fa,Tbl:COMPLETE,Col:COMPLETE,Output:["userid"] PREHOOK: query: select t1.userid, diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out index df32ad4..a3c96bb 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_14.q.out @@ -65,38 +65,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -108,15 +108,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -125,10 +125,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -189,38 +189,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -233,18 +233,18 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -254,11 +254,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 4 Execution mode: llap @@ -266,10 +266,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -345,38 +345,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -389,17 +389,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -407,17 +407,17 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -426,10 +426,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -497,38 +497,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -540,15 +540,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -557,10 +557,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -632,38 +632,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -675,15 +675,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -692,10 +692,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -791,38 +791,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -834,15 +834,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -851,10 +851,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -938,38 +938,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -981,15 +981,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -998,10 +998,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1063,38 +1063,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1106,15 +1106,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1123,10 +1123,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1184,38 +1184,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1227,15 +1227,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1244,10 +1244,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1301,38 +1301,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1344,15 +1344,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 23 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1361,10 +1361,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1428,57 +1428,57 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1492,15 +1492,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 6 Data size: 46 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1509,10 +1509,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1592,38 +1592,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1635,15 +1635,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -1652,10 +1652,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out index df89454..2a8f6be 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_15.q.out @@ -55,22 +55,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string) auto parallelism: true @@ -85,7 +85,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -108,7 +108,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,22 +135,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string) auto parallelism: true @@ -165,7 +165,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -188,7 +188,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -223,12 +223,12 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -241,16 +241,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1900 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -369,22 +369,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col2 (type: string) auto parallelism: true @@ -399,7 +399,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -422,7 +422,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -449,22 +449,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col2 (type: string) auto parallelism: true @@ -479,7 +479,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -502,7 +502,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -537,12 +537,12 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -555,16 +555,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -631,22 +631,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col2 (type: string) auto parallelism: true @@ -661,7 +661,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -684,7 +684,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -711,22 +711,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and key2 is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col2 (type: string) auto parallelism: true @@ -741,7 +741,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -764,7 +764,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -799,12 +799,12 @@ STAGE PLANS: 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -817,16 +817,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1188 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -893,22 +893,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: int) auto parallelism: true @@ -923,7 +923,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -946,7 +946,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -973,22 +973,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), key2 (type: int), value (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col2 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: int), _col2 (type: string) - Statistics: Num rows: 500 Data size: 7218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: int) auto parallelism: true @@ -1003,7 +1003,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -1026,7 +1026,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -1061,12 +1061,12 @@ STAGE PLANS: 1 _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Position of Big Table: 0 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 10 TopN Hash Memory Usage: 0.1 @@ -1079,16 +1079,16 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 550 Data size: 7939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out index 3ffef3b..40caf52 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_17.q.out @@ -193,133 +193,133 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 9 Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -341,15 +341,15 @@ STAGE PLANS: 4 _col0 (type: int) 5 _col0 (type: int) 6 _col0 (type: int) - Statistics: Num rows: 66 Data size: 462 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -358,10 +358,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -440,59 +440,59 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join0 to 1 @@ -509,15 +509,15 @@ STAGE PLANS: 4 _col0 (type: int) 5 _col0 (type: int) 6 _col0 (type: int) - Statistics: Num rows: 66 Data size: 462 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 1712 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -527,10 +527,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -611,67 +611,67 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: h - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join0 to 1 @@ -690,15 +690,15 @@ STAGE PLANS: 5 _col0 (type: int) 6 _col0 (type: int) 7 _col0 (type: int) - Statistics: Num rows: 77 Data size: 539 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 357 Data size: 2856 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -708,10 +708,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -822,131 +822,131 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: c - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: d - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: e - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: f - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: g - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: h - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: i - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: j - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: k - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: l - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: m - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: n - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: o - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: p - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join0 to 1 @@ -982,76 +982,76 @@ STAGE PLANS: 8 _col0 (type: int) 9 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 165 Data size: 1155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21268 Data size: 1977924 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 165 Data size: 1155 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21268 Data size: 1977924 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap Map 18 Map Operator Tree: TableScan alias: q - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 19 Map Operator Tree: TableScan alias: r - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 20 Map Operator Tree: TableScan alias: s - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 21 Map Operator Tree: TableScan alias: t - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1070,10 +1070,10 @@ STAGE PLANS: 3 _col0 (type: int) 4 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 726 Data size: 5082 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 164104 Data size: 15261672 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 726 Data size: 5082 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 164104 Data size: 15261672 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out index e2d5e4e..5fbac77 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_18.q.out @@ -43,6 +43,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -53,14 +54,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,6 +88,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -220,6 +228,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -227,25 +236,26 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 238) (type: boolean) - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 238 (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -255,15 +265,51 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 1053 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1045 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -283,6 +329,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 PREHOOK: type: QUERY @@ -363,6 +416,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -373,14 +427,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -407,6 +461,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out index c3b29ea..bb10c12 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_19.q.out @@ -43,6 +43,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -53,14 +54,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -87,6 +88,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out index 2c1ee96..823ba1c 100644 --- a/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/llap/smb_mapjoin_6.q.out @@ -67,52 +67,54 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -126,15 +128,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -152,6 +182,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY @@ -1249,52 +1286,54 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1308,15 +1347,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1275 Data size: 242250 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -1334,6 +1401,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY @@ -2447,52 +2521,54 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2506,15 +2582,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2532,6 +2636,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 PREHOOK: type: QUERY @@ -2561,52 +2672,54 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2620,15 +2733,43 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 1 Data size: 190 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -2646,6 +2787,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 PREHOOK: type: QUERY @@ -2684,19 +2832,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2704,19 +2852,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2724,19 +2872,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key > 1000) (type: boolean) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2752,10 +2900,10 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 365 Data size: 3513 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 285 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out index c73e0d2..218c190 100644 --- a/ql/src/test/results/clientpositive/llap/sqlmerge.q.out +++ b/ql/src/test/results/clientpositive/llap/sqlmerge.q.out @@ -29,6 +29,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9 Stage-2 depends on stages: Stage-5 Stage-7 depends on stages: Stage-2 Stage-3 depends on stages: Stage-5 @@ -41,11 +43,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 2 (SIMPLE_EDGE) Reducer 5 <- Reducer 2 (SIMPLE_EDGE) Reducer 6 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -61,7 +64,7 @@ STAGE PLANS: value expressions: ROW__ID (type: struct) Execution mode: llap LLAP IO: may be used (ACID table) - Map 7 + Map 8 Map Operator Tree: TableScan alias: s @@ -197,6 +200,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_tmp_table + Select Operator + expressions: _col0 (type: int) + outputColumnNames: val + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(val, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 484 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -213,6 +236,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-5 Dependency Collection @@ -230,6 +281,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: val + Column Types: int + Table: default.merge_tmp_table + Stage: Stage-2 Move Operator tables: @@ -280,14 +345,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -302,7 +369,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Execution mode: llap LLAP IO: may be used (ACID table) - Map 4 + Map 5 Map Operator Tree: TableScan alias: s @@ -354,6 +421,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.acidtbl Write Type: INSERT + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -371,3 +466,10 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, int + Table: default.acidtbl + diff --git a/ql/src/test/results/clientpositive/llap/stats11.q.out b/ql/src/test/results/clientpositive/llap/stats11.q.out index c71a71e..8d08062 100644 --- a/ql/src/test/results/clientpositive/llap/stats11.q.out +++ b/ql/src/test/results/clientpositive/llap/stats11.q.out @@ -307,13 +307,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -392,7 +394,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -519,6 +521,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -555,6 +604,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b @@ -673,13 +730,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -758,7 +817,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -864,7 +923,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -885,6 +944,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6877 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-2 Dependency Collection @@ -898,7 +1004,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -921,6 +1027,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out index 57aaf55..1871db0 100644 --- a/ql/src/test/results/clientpositive/llap/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/llap/stats_only_null.q.out @@ -73,56 +73,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -133,56 +89,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 3004e36..685360d 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -970,14 +970,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -1012,10 +1012,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 58e78c4..0313b88 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -5026,19 +5026,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: i is not null (type: boolean) - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToLong(_col0) (type: bigint) sort order: + Map-reduce partition columns: UDFToLong(_col0) (type: bigint) - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -5070,10 +5070,10 @@ STAGE PLANS: 0 UDFToLong(_col0) (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5234,16 +5234,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToLong(_col0) (type: bigint), _col1 (type: int) sort order: ++ Map-reduce partition columns: UDFToLong(_col0) (type: bigint), _col1 (type: int) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -5251,21 +5251,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tt - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5279,10 +5279,10 @@ STAGE PLANS: 0 UDFToLong(_col0) (type: bigint), _col1 (type: int) 1 _col0 (type: bigint), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5295,16 +5295,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: bigint), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index b6fb37a..65d83ee 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -97,19 +97,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (p_size is not null and p_brand is not null) (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: int) sort order: + Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -117,44 +117,44 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_size is not null (type: boolean) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_size (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_brand is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string) outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -167,12 +167,12 @@ STAGE PLANS: 0 _col5 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -184,10 +184,10 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 491 Data size: 1969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 491 Data size: 1969 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -199,12 +199,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -273,19 +273,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -293,39 +293,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 2604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_name (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string) outputColumnNames: p_name - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(p_name) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -333,17 +333,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_name (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -353,16 +353,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap @@ -374,10 +374,10 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -389,12 +389,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 - Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Execution mode: llap @@ -406,17 +406,17 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 - Statistics: Num rows: 18 Data size: 2286 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col10 = 0) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -428,12 +428,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -441,10 +441,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Stage: Stage-0 @@ -488,19 +488,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -508,39 +508,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 2604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_name (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_type (type: string) outputColumnNames: p_type - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(p_type) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -548,17 +548,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -568,16 +568,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 107 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 107 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap @@ -589,10 +589,10 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -604,12 +604,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 - Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2376 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Execution mode: llap @@ -621,17 +621,17 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 - Statistics: Num rows: 18 Data size: 2286 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col10 = 0) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -643,12 +643,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -656,10 +656,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Stage: Stage-0 @@ -729,19 +729,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_brand is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -749,39 +749,39 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_brand is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_brand (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string) outputColumnNames: p_name - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(p_name) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -789,20 +789,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 2604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_name (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -812,16 +812,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Reducer 2 Execution mode: llap @@ -833,10 +833,10 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -848,12 +848,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 - Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2970 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 17 Data size: 2079 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2970 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) Reducer 4 Execution mode: llap @@ -865,17 +865,17 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 - Statistics: Num rows: 18 Data size: 2286 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col10 = 0) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1524 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -887,12 +887,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -900,10 +900,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Stage: Stage-0 @@ -946,17 +946,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_brand is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1001,24 +1001,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_brand is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_brand (type: string) outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2944 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1031,12 +1031,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 5 Data size: 3341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2975 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2975 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: bigint) Reducer 3 Execution mode: llap @@ -1048,19 +1048,19 @@ STAGE PLANS: 0 _col1 (type: string) 1 UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 5 Data size: 3675 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 3272 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: ((_col9 = 0) or (_col12 is null and _col1 is not null and (_col10 >= _col9))) (type: boolean) - Statistics: Num rows: 2 Data size: 1470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1308 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 2 Data size: 1470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1308 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 2 Data size: 1470 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1308 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 4 Execution mode: llap @@ -1072,10 +1072,10 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 35 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1438 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 35 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1438 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1179,19 +1179,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1199,20 +1199,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 2604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_name (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -1244,10 +1244,10 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -1259,10 +1259,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 17 Data size: 1875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 1875 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1274,12 +1274,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -1361,19 +1361,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_size is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: int) sort order: + Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1381,20 +1381,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_size is not null (type: boolean) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_size (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -1426,10 +1426,10 @@ STAGE PLANS: 0 _col5 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 447 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -1441,10 +1441,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 447 Data size: 2237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2895 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 447 Data size: 2237 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2895 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1456,12 +1456,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 407 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -1517,19 +1517,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1537,20 +1537,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 2604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_name (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 @@ -1582,10 +1582,10 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 17 Data size: 1790 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -1597,17 +1597,17 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 17 Data size: 1807 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2316 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col10 is null (type: boolean) - Statistics: Num rows: 8 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1158 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1158 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1158 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1619,12 +1619,12 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -1714,16 +1714,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -1913,12 +1913,12 @@ STAGE PLANS: 0 _col4 (type: string), _col1 (type: string) 1 _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 3217 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 14 Data size: 3217 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -1930,12 +1930,12 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: string) sort order: ++ Map-reduce partition columns: _col3 (type: string), _col4 (type: string) - Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) Reducer 4 Execution mode: llap @@ -1947,17 +1947,17 @@ STAGE PLANS: 0 _col3 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 16 Data size: 3891 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2086,19 +2086,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string) sort order: + Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 2312 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2156,12 +2156,12 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string), _col4 (type: string) sort order: ++ Map-reduce partition columns: _col3 (type: string), _col4 (type: string) - Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -2173,10 +2173,10 @@ STAGE PLANS: 0 _col3 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 15 Data size: 1903 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2266,16 +2266,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col1 (type: string), _col6 (type: string) sort order: +++ Map-reduce partition columns: _col4 (type: string), _col1 (type: string), _col6 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2469,12 +2469,12 @@ STAGE PLANS: 0 _col4 (type: string), _col1 (type: string), _col6 (type: string) 1 _col1 (type: string), _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -2486,12 +2486,12 @@ STAGE PLANS: 0 _col4 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14 - Statistics: Num rows: 5 Data size: 3939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string), _col3 (type: string) sort order: ++ Map-reduce partition columns: _col4 (type: string), _col3 (type: string) - Statistics: Num rows: 5 Data size: 3939 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col13 (type: bigint), _col14 (type: bigint) Reducer 4 Execution mode: llap @@ -2503,17 +2503,17 @@ STAGE PLANS: 0 _col4 (type: string), _col3 (type: string) 1 _col1 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col13, _col14, _col17 - Statistics: Num rows: 5 Data size: 4332 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col13 = 0)) THEN (false) WHEN (_col13 is null) THEN (false) WHEN (_col17 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col14 < _col13)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 3 Data size: 2599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 2599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 2599 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2641,16 +2641,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col4 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col4 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2747,12 +2747,12 @@ STAGE PLANS: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 14 Data size: 3217 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: int) sort order: + Map-reduce partition columns: _col5 (type: int) - Statistics: Num rows: 14 Data size: 3217 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -2764,12 +2764,12 @@ STAGE PLANS: 0 _col5 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13 - Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string), _col5 (type: int) sort order: ++ Map-reduce partition columns: _col3 (type: string), _col5 (type: int) - Statistics: Num rows: 15 Data size: 3538 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 594 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col12 (type: bigint), _col13 (type: bigint) Reducer 4 Execution mode: llap @@ -2781,17 +2781,17 @@ STAGE PLANS: 0 _col3 (type: string), _col5 (type: int) 1 _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col12, _col13, _col16 - Statistics: Num rows: 16 Data size: 3891 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col12 = 0)) THEN (false) WHEN (_col12 is null) THEN (false) WHEN (_col16 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col13 < _col12)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 598 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 1945 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 578 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2889,16 +2889,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col4 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col4 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -2952,10 +2952,10 @@ STAGE PLANS: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index f28fcab..3b3dc64 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -5872,14 +5872,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -5887,19 +5887,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int) outputColumnNames: c1 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(c1) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5907,17 +5907,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: c1 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -5930,12 +5930,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -5947,17 +5947,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 4 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean) - Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 209 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5969,10 +5969,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 7 Execution mode: llap @@ -5981,16 +5981,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -6037,16 +6037,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int), c2 (type: char(100)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: char(100)) sort order: + Map-reduce partition columns: _col1 (type: char(100)) - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6054,33 +6054,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: c2 (type: char(100)) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(100)) sort order: + Map-reduce partition columns: _col0 (type: char(100)) - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6088,33 +6088,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 352 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: c2 (type: char(100)) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(100)) sort order: + Map-reduce partition columns: _col0 (type: char(100)) - Statistics: Num rows: 4 Data size: 313 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 9 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6128,17 +6128,17 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: int), _col1 (type: char(100)) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: char(100)) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reducer 11 Execution mode: llap Reduce Operator Tree: @@ -6146,19 +6146,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: char(100)) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: char(100)), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: char(100)) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 13 Execution mode: llap @@ -6167,12 +6167,12 @@ STAGE PLANS: keys: KEY._col0 (type: char(100)) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: char(100)) Reducer 2 Execution mode: llap @@ -6184,12 +6184,12 @@ STAGE PLANS: 0 _col1 (type: char(100)) 1 _col0 (type: char(100)) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: char(100)) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: char(100)) - Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -6201,17 +6201,17 @@ STAGE PLANS: 0 _col0 (type: int), _col1 (type: char(100)) 1 _col0 (type: int), _col1 (type: char(100)) outputColumnNames: _col0, _col3, _col4, _col7 - Statistics: Num rows: 4 Data size: 378 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 189 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 189 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 189 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6226,18 +6226,18 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(_col0) keys: _col1 (type: char(100)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(100)) sort order: + Map-reduce partition columns: _col0 (type: char(100)) - Statistics: Num rows: 3 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -6247,12 +6247,12 @@ STAGE PLANS: keys: KEY._col0 (type: char(100)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(100)) sort order: + Map-reduce partition columns: _col0 (type: char(100)) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 8 Execution mode: llap @@ -6261,12 +6261,12 @@ STAGE PLANS: keys: KEY._col0 (type: char(100)) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: UDFToDouble(_col0) (type: double) sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 2 Data size: 156 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: char(100)) Stage: Stage-0 @@ -6358,16 +6358,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int), b (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6375,21 +6375,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: b is not null (type: boolean) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(a) keys: b (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6397,20 +6397,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: b is not null (type: boolean) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: b (type: int), a (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6423,12 +6423,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -6440,17 +6440,17 @@ STAGE PLANS: 0 _col0 (type: int), _col1 (type: int) 1 _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 3 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 6 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6463,12 +6463,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -6477,23 +6477,23 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int), _col0 (type: int) sort order: ++ Map-reduce partition columns: _col1 (type: int), _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -6584,16 +6584,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: fixob - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6601,21 +6601,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t7 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6623,37 +6623,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t7 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int), j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: fixob - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: j (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6668,17 +6668,17 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col3 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 4 Data size: 19 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6691,12 +6691,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -6705,16 +6705,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 7 Execution mode: llap @@ -6726,12 +6726,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 9 Execution mode: llap @@ -6740,12 +6740,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6820,16 +6820,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -6837,21 +6837,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6859,37 +6859,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int), j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: j (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -6904,17 +6904,17 @@ STAGE PLANS: 1 _col0 (type: int) 2 _col3 (type: int) outputColumnNames: _col0, _col1, _col3, _col4, _col7 - Statistics: Num rows: 6 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col1 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -6927,12 +6927,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 6 Execution mode: llap @@ -6941,16 +6941,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 7 Execution mode: llap @@ -6962,12 +6962,12 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col2, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: int) sort order: + Map-reduce partition columns: _col3 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Reducer 9 Execution mode: llap @@ -6976,12 +6976,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -7023,16 +7023,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -7040,21 +7040,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) keys: j (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -7062,20 +7062,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: j is not null (type: boolean) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int), j (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -7088,12 +7088,12 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3, _col4 - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 3 Data size: 11 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: bigint) Reducer 3 Execution mode: llap @@ -7105,17 +7105,17 @@ STAGE PLANS: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) outputColumnNames: _col0, _col3, _col4, _col7 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col3 = 0)) THEN (false) WHEN (_col3 is null) THEN (false) WHEN (_col7 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col4 < _col3)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7128,12 +7128,12 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 7 Execution mode: llap @@ -7142,19 +7142,19 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: boolean) Stage: Stage-0 @@ -7196,14 +7196,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), j (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -7211,19 +7211,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: i - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -7231,17 +7231,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -7254,12 +7254,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 Execution mode: llap @@ -7271,17 +7271,17 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col5 - Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col2 = 0) or (_col5 is null and _col1 is not null and (_col3 >= _col2))) (type: boolean) - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7293,10 +7293,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 7 Execution mode: llap @@ -7305,16 +7305,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 @@ -7358,14 +7358,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -7373,19 +7373,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: i - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -7393,17 +7393,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -7416,12 +7416,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -7433,17 +7433,17 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 3 Data size: 67 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col1 = 0) or (_col4 is null and _col0 is not null and (_col2 >= _col1))) (type: boolean) - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7455,10 +7455,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 7 Execution mode: llap @@ -7467,16 +7467,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out index 250c93d..4bb73f9 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_scalar.q.out @@ -113,19 +113,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int) outputColumnNames: p_partkey - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(p_partkey) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -133,19 +133,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_size (type: int) outputColumnNames: p_size - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(p_size) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -161,17 +161,17 @@ STAGE PLANS: 1 2 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 18304 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (UDFToDouble(_col5) > _col10) (type: boolean) - Statistics: Num rows: 8 Data size: 5632 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 5016 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 8 Data size: 5632 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 8 Data size: 5632 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 4952 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -183,22 +183,22 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -206,10 +206,10 @@ STAGE PLANS: aggregations: avg(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 76 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Stage: Stage-0 @@ -407,20 +407,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is null (type: boolean) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -428,15 +428,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_name is null (type: boolean) - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -451,14 +451,14 @@ STAGE PLANS: 1 2 outputColumnNames: _col0, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 498 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), null (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 16 Data size: 11084 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 582 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -470,15 +470,15 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -553,17 +553,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tnull - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: llap @@ -593,17 +593,17 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 26 Data size: 16250 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col10 is null (type: boolean) - Statistics: Num rows: 13 Data size: 8125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 623 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 13 Data size: 8125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 13 Data size: 8125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -645,13 +645,13 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 5 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Stage: Stage-0 @@ -736,19 +736,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int) outputColumnNames: p_partkey - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(p_partkey) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -756,19 +756,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string) outputColumnNames: p_name - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(p_name) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -784,10 +784,10 @@ STAGE PLANS: 1 2 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 21112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 26 Data size: 21112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -799,22 +799,22 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -822,15 +822,15 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -1824,41 +1824,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3317 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3317 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(p_size) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 179 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs @@ -1874,17 +1874,17 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 631 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (UDFToDouble(_col5) > CASE WHEN (_col12 is null) THEN (null) ELSE (_col11) END) (type: boolean) - Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 631 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1896,25 +1896,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -1923,16 +1923,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: double), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 115 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 115 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double), _col1 (type: boolean) Stage: Stage-0 @@ -1997,19 +1997,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_size (type: int) outputColumnNames: p_size - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(p_size) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: llap LLAP IO: no inputs @@ -2017,41 +2017,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3317 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3317 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(p_size) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 107 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 107 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -2059,19 +2059,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int) outputColumnNames: p_partkey - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(p_partkey) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2082,22 +2082,22 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 12 Execution mode: llap Reduce Operator Tree: @@ -2105,10 +2105,10 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reducer 2 Execution mode: llap @@ -2122,10 +2122,10 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: int), _col12 (type: boolean) Reducer 3 Execution mode: llap @@ -2137,10 +2137,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 - Statistics: Num rows: 57 Data size: 35919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 57 Data size: 35919 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: int), _col12 (type: boolean) Reducer 4 Execution mode: llap @@ -2152,17 +2152,17 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col15 - Statistics: Num rows: 57 Data size: 36204 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 631 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: _col5 BETWEEN CASE WHEN (_col12 is null) THEN (null) ELSE (_col11) END AND _col15 (type: boolean) - Statistics: Num rows: 6 Data size: 3810 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 631 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6 Data size: 3810 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 3810 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2174,25 +2174,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reducer 8 Execution mode: llap Reduce Operator Tree: @@ -2201,16 +2201,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 107 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: boolean) Stage: Stage-0 @@ -2300,19 +2300,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 407 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 407 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_retailprice (type: double) outputColumnNames: p_retailprice - Statistics: Num rows: 407 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 407 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(p_retailprice) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: llap LLAP IO: no inputs @@ -2320,41 +2320,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3317 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 31 Data size: 3317 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(p_size) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 107 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 31 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 107 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int) Execution mode: llap LLAP IO: no inputs @@ -2362,19 +2362,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int) outputColumnNames: p_partkey - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(p_partkey) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2385,10 +2385,10 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Reducer 2 Execution mode: llap @@ -2402,17 +2402,17 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 - Statistics: Num rows: 57 Data size: 35406 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col5 >= CASE WHEN (_col12 is null) THEN (null) ELSE (_col11) END) (type: boolean) - Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 19 Data size: 11802 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -2426,17 +2426,17 @@ STAGE PLANS: 1 2 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col15 - Statistics: Num rows: 19 Data size: 12125 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col7 <= _col15) (type: boolean) - Statistics: Num rows: 6 Data size: 3828 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 627 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 6 Data size: 3828 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 3828 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 619 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2448,25 +2448,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -2475,16 +2475,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 107 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 15 Data size: 1575 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: boolean) Reducer 9 Execution mode: llap @@ -2493,22 +2493,22 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -4165,14 +4165,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -4217,19 +4217,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int) outputColumnNames: p_partkey - Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 814 Data size: 3256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(p_partkey) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -4237,19 +4237,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_name (type: string) outputColumnNames: p_name - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 2688 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(p_name) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -4293,10 +4293,10 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 3301 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 3 Execution mode: llap @@ -4308,17 +4308,17 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 5 Data size: 4226 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 3810 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not (_col1 like _col10)) (type: boolean) - Statistics: Num rows: 3 Data size: 2535 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2286 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 3 Data size: 2535 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2286 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 3 Data size: 2535 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2286 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 4 Execution mode: llap @@ -4330,12 +4330,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12 - Statistics: Num rows: 3 Data size: 2586 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1782 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 3 Data size: 2586 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1782 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: bigint), _col12 (type: bigint) Reducer 5 Execution mode: llap @@ -4347,17 +4347,17 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col11, _col12, _col14 - Statistics: Num rows: 14 Data size: 1787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1794 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((_col11 = 0) or (_col14 is null and _col3 is not null and (_col12 >= _col11))) (type: boolean) - Statistics: Num rows: 9 Data size: 1148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1794 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 1148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1734 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 1148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1734 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -4369,22 +4369,22 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 9 Execution mode: llap Reduce Operator Tree: @@ -4392,10 +4392,10 @@ STAGE PLANS: aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Stage: Stage-0 @@ -4469,14 +4469,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -4484,21 +4484,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pp - Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2992 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2992 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(p_name) keys: p_type (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -4543,20 +4543,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: pp - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: p_type is not null (type: boolean) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 32 Data size: 3296 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: p_type (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 32 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 10 @@ -4566,25 +4566,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 16 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 103 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 814 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2 Data size: 203 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 111 Basic stats: COMPLETE Column stats: COMPLETE Reducer 12 Execution mode: llap Reduce Operator Tree: @@ -4593,16 +4593,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 287 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 8 Data size: 1628 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: boolean) Reducer 2 Execution mode: llap @@ -4614,12 +4614,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 5 Data size: 3341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2970 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 5 Data size: 3341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2970 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col9 (type: bigint), _col10 (type: bigint) Reducer 3 Execution mode: llap @@ -4631,19 +4631,19 @@ STAGE PLANS: 0 _col3 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 14 Data size: 1787 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) - Statistics: Num rows: 7 Data size: 893 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1794 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 7 Data size: 893 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1794 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: string) sort order: + Map-reduce partition columns: _col4 (type: string) - Statistics: Num rows: 7 Data size: 893 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 1794 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Reducer 4 Execution mode: llap @@ -4657,17 +4657,17 @@ STAGE PLANS: 1 _col0 (type: string) 2 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col15, _col16 - Statistics: Num rows: 17 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 2298 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (not (_col1 like CASE WHEN (_col16 is null) THEN (null) ELSE (_col15) END)) (type: boolean) - Statistics: Num rows: 9 Data size: 1895 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1532 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 9 Data size: 1895 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 9 Data size: 1895 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 1156 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5479,16 +5479,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_null - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: string), _col4 (type: string) sort order: ++ Map-reduce partition columns: _col1 (type: string), _col4 (type: string) - Statistics: Num rows: 5 Data size: 3256 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: llap LLAP IO: no inputs @@ -5564,10 +5564,10 @@ STAGE PLANS: 0 _col1 (type: string), _col4 (type: string) 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 3581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 2890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -5848,41 +5848,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: name (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(deptno) keys: name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -5920,25 +5920,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -5947,16 +5947,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Stage: Stage-0 @@ -6019,41 +6019,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptno is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: deptno (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptno is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(name) keys: deptno (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -6091,25 +6091,25 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reducer 6 Execution mode: llap Reduce Operator Tree: @@ -6118,16 +6118,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: boolean) Stage: Stage-0 @@ -6189,21 +6189,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptno is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(name) keys: deptno (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6211,41 +6211,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: name (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(deptno) keys: name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6253,20 +6253,20 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: deptno is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: deptno (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 11 @@ -6277,16 +6277,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Reducer 2 Execution mode: llap @@ -6348,25 +6348,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -6375,16 +6375,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Reducer 9 Execution mode: llap @@ -6393,25 +6393,25 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6477,19 +6477,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: name (type: string) outputColumnNames: name - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(name) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6497,41 +6497,41 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: name (type: string) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: name is not null (type: boolean) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 291 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(deptno) keys: name (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6539,19 +6539,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: depts - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: deptno (type: int) outputColumnNames: deptno - Statistics: Num rows: 3 Data size: 31 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(deptno) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -6562,10 +6562,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 2 Execution mode: llap @@ -6625,25 +6625,25 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() keys: _col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col1) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: llap Reduce Operator Tree: @@ -6652,16 +6652,16 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), true (type: boolean), _col0 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: boolean) Reducer 9 Execution mode: llap @@ -6670,22 +6670,22 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: complete outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (sq_count_check(_col0) <= 1) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_select.q.out b/ql/src/test/results/clientpositive/llap/subquery_select.q.out index 8eaec9e..af70a3b 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_select.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_select.q.out @@ -2537,19 +2537,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tnull - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int) outputColumnNames: i - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(), count(i) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -2557,17 +2557,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tnull - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: i (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -2580,12 +2580,12 @@ STAGE PLANS: 0 1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 546 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 546 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 520 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: llap @@ -2597,14 +2597,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col4 - Statistics: Num rows: 28 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 624 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), CASE WHEN ((_col1 = 0)) THEN (false) WHEN (_col4 is not null) THEN (true) WHEN (_col0 is null) THEN (null) WHEN ((_col2 < _col1)) THEN (null) ELSE (false) END (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 28 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 28 Data size: 600 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2616,10 +2616,10 @@ STAGE PLANS: aggregations: count(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 7 Execution mode: llap @@ -2628,16 +2628,16 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), true (type: boolean) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 2 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Stage: Stage-0 diff --git a/ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out b/ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out index c83da8b..bda0eb5 100644 --- a/ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_bmj_schema_evolution.q.out @@ -87,14 +87,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test - Statistics: Num rows: 1000 Data size: 60312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 1000 Data size: 60312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 60312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -104,11 +104,11 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 1100 Data size: 66343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2551 Data size: 242345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1100 Data size: 66343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2551 Data size: 242345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: some inputs @@ -116,19 +116,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -137,10 +137,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1100 Data size: 66343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2551 Data size: 242345 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1100 Data size: 66343 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2551 Data size: 242345 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_dml.q.out b/ql/src/test/results/clientpositive/llap/tez_dml.q.out index 786929e..6e6ba27 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dml.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dml.q.out @@ -438,11 +438,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -461,8 +465,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmp_src_part + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: c, d + Statistics: Num rows: 309 Data size: 2718 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c, 16) + keys: d (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 309 Data size: 2718 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 309 Data size: 2718 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 1354 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 154 Data size: 1354 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 154 Data size: 1354 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -482,6 +522,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c + Column Types: string + Table: default.tmp_src_part + PREHOOK: query: INSERT INTO TABLE tmp_src_part PARTITION (d) SELECT * FROM tmp_src PREHOOK: type: QUERY PREHOOK: Input: default@tmp_src @@ -857,6 +904,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -864,6 +913,10 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -885,6 +938,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.even + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c, d + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c, 16), compute_stats(d, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key % 2) = 1) (type: boolean) Statistics: Num rows: 250 Data size: 44500 Basic stats: COMPLETE Column stats: COMPLETE @@ -900,8 +966,51 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.odd + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c, d + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c, 16), compute_stats(d, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -919,6 +1028,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c, d + Column Types: int, string + Table: default.even + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c, d + Column Types: int, string + Table: default.odd + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/tez_fsstat.q.out b/ql/src/test/results/clientpositive/llap/tez_fsstat.q.out index 133f50c..802140c 100644 --- a/ql/src/test/results/clientpositive/llap/tez_fsstat.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_fsstat.q.out @@ -83,7 +83,7 @@ Database: default Table: tab_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out b/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out index 92a188e..f436baa 100644 --- a/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_join_hash.q.out @@ -58,19 +58,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_src - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -82,15 +82,15 @@ STAGE PLANS: keys: 0 _col0 (type: string) 1 _col0 (type: string) - Statistics: Num rows: 550 Data size: 47850 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1219 Data size: 9752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -99,10 +99,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_nway_join.q.out b/ql/src/test/results/clientpositive/llap/tez_nway_join.q.out index cee5b9c..c6703f1 100644 --- a/ql/src/test/results/clientpositive/llap/tez_nway_join.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_nway_join.q.out @@ -49,14 +49,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -68,15 +68,15 @@ STAGE PLANS: input vertices: 1 Map 3 2 Map 4 - Statistics: Num rows: 101 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15525 Data size: 124200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -84,38 +84,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -125,10 +125,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -161,14 +161,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -178,7 +178,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 690 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -187,15 +187,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15525 Data size: 124200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -203,38 +203,38 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -244,10 +244,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -302,48 +302,48 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -357,15 +357,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 101 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15525 Data size: 124200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -374,10 +374,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -410,11 +410,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -424,7 +424,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 50 Data size: 202 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 690 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -433,15 +433,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 4 - Statistics: Num rows: 55 Data size: 222 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 15525 Data size: 124200 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -449,32 +449,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 45 Data size: 183 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 45 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -484,10 +484,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_self_join.q.out b/ql/src/test/results/clientpositive/llap/tez_self_join.q.out index 8ba9761..8d89ef8 100644 --- a/ql/src/test/results/clientpositive/llap/tez_self_join.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_self_join.q.out @@ -38,7 +38,7 @@ POSTHOOK: query: insert into table tez_self_join2 values(1),(2),(3) POSTHOOK: type: QUERY POSTHOOK: Output: default@tez_self_join2 POSTHOOK: Lineage: tez_self_join2.id1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select s.id2, s.id3 from @@ -70,60 +70,60 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) - Reducer 3 <- Map 5 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Map 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: self1 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + alias: tez_self_join2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ((id2 = 'ab') and id1 is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: id1 is not null (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id1 (type: int), id3 (type: string) - outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + expressions: id1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 3 Map Operator Tree: TableScan - alias: self2 - Statistics: Num rows: 3 Data size: 21 Basic stats: COMPLETE Column stats: NONE + alias: self1 + Statistics: Num rows: 3 Data size: 528 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ('ab' = id3) (type: boolean) - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + predicate: ((id2 = 'ab') and id1 is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + expressions: id1 (type: int), id3 (type: string) + outputColumnNames: _col0, _col2 + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan - alias: tez_self_join2 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + alias: self2 + Statistics: Num rows: 3 Data size: 258 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: id1 is not null (type: boolean) - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + predicate: ('ab' = id3) (type: boolean) + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: id1 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE + sort order: + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -133,42 +133,42 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col2 + Statistics: Num rows: 1 Data size: 86 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'ab' (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: 0 1 outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 15 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 90 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) - Reducer 3 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: 'ab' (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 3 Data size: 3 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -176,7 +176,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +Warning: Shuffle Join MERGEJOIN[24][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select s.id2, s.id3 from ( diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out index 2fcf0fc..78803f6 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_1.q.out @@ -123,49 +123,61 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: + Map 1 <- Map 3 (CUSTOM_EDGE) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: s3 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + input vertices: + 1 Map 3 + Statistics: Num rows: 500 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: s3 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -173,10 +185,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -222,37 +234,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vt1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -264,15 +276,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap @@ -281,10 +293,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -295,12 +307,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -367,37 +379,37 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: vt1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -406,12 +418,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -421,15 +433,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -438,10 +450,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -513,36 +525,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 4 @@ -550,28 +562,28 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: llap @@ -580,10 +592,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -659,36 +671,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -697,12 +709,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -712,15 +724,15 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 4 Execution mode: llap @@ -729,10 +741,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -743,12 +755,12 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out index d9d2db4..730c396 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_empty.q.out @@ -149,21 +149,21 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 1064 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -236,11 +236,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 47094 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 242 Data size: 44528 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 67518 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join0 to 1 @@ -248,10 +248,10 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 242 Data size: 139392 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 137214 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false - Statistics: Num rows: 242 Data size: 139392 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 242 Data size: 137214 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -540,14 +540,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s2 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s3 @@ -562,14 +562,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join0 to 1 @@ -578,7 +578,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 532 Data size: 9904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -657,25 +657,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s3 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Left Outer Join0 to 1 @@ -684,7 +684,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 532 Data size: 9904 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 532 Data size: 2129 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -841,14 +841,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s3 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: s1 @@ -866,7 +866,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 266 Data size: 1064 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out b/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out index b583bff..79e2d66 100644 --- a/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_smb_main.q.out @@ -263,33 +263,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -298,15 +298,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -317,10 +317,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -374,33 +374,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 2 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -409,15 +409,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Map 1 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -428,10 +428,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -584,11 +584,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: a @@ -600,44 +601,23 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - Map Operator Tree: - TableScan - alias: b - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Merge Join Operator + Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Execution mode: llap + LLAP IO: no inputs Map 4 Map Operator Tree: TableScan @@ -657,6 +637,44 @@ STAGE PLANS: Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1070,8 +1088,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (CUSTOM_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE) + Map 3 <- Map 1 (CUSTOM_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1086,15 +1105,43 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 2 + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 2 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -1112,50 +1159,21 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1 input vertices: 0 Map 1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Execution mode: llap - LLAP IO: no inputs - Map 4 - Map Operator Tree: - TableScan - alias: c - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: value (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1224,14 +1242,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1241,7 +1259,7 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 4 - Statistics: Num rows: 266 Data size: 4952 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1250,15 +1268,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 6 - Statistics: Num rows: 558 Data size: 10399 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1892 Data size: 15136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1266,33 +1284,33 @@ STAGE PLANS: Map Operator Tree: TableScan alias: s3 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: s2 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1301,15 +1319,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 1 Map 6 - Statistics: Num rows: 558 Data size: 10399 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1892 Data size: 15136 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs @@ -1317,24 +1335,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -1344,10 +1362,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1397,36 +1415,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 3 Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1435,19 +1453,19 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 242 Data size: 4502 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1456,15 +1474,15 @@ STAGE PLANS: 1 _col0 (type: int) input vertices: 0 Reducer 2 - Statistics: Num rows: 550 Data size: 10243 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 617 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 5 Execution mode: llap @@ -1473,10 +1491,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out index 2ca78d7..f919b6a 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_dynamic_partition.q.out @@ -47,6 +47,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -54,7 +55,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 3 <- Union 2 (CONTAINS) + Map 4 <- Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -78,9 +80,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 16) + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: dummy @@ -101,8 +119,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.partunion1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: id1, part1 + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(id1, 16) + keys: part1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 564 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Execution mode: llap LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 568 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -124,6 +178,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: id1 + Column Types: int + Table: default.partunion1 + PREHOOK: query: insert into table partunion1 partition(part1) select temps.* from ( select 1 as id1, '2014' as part1 from dummy diff --git a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index 14e8e43..c7739e4 100644 --- a/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -45,6 +45,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -53,11 +55,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) - Map 7 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) + Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Union 3 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -78,7 +82,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -111,7 +115,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s0 @@ -199,10 +203,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -220,6 +252,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -239,6 +299,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -952,6 +1026,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -961,10 +1037,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 7 <- Union 2 (CONTAINS) + Map 9 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Union 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1000,7 +1078,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s1 @@ -1018,7 +1096,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -1072,10 +1150,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1093,10 +1199,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -1146,6 +1280,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -1855,6 +2003,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1864,10 +2014,12 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 7 <- Union 2 (CONTAINS) + Map 9 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Union 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -1903,7 +2055,7 @@ STAGE PLANS: Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s1 @@ -1921,7 +2073,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -1975,10 +2127,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1996,10 +2176,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1001 Data size: 456456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 @@ -2049,6 +2257,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -2750,6 +2972,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -2758,10 +2982,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 5 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Union 3 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2782,7 +3008,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -2870,10 +3096,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -2891,6 +3145,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -2910,6 +3192,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -3605,6 +3901,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -3613,10 +3911,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3637,7 +3937,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -3717,6 +4017,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -3738,6 +4051,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 34000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -3757,6 +4113,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/union4.q.out b/ql/src/test/results/clientpositive/llap/union4.q.out index b9ca42d..9429f1d 100644 --- a/ql/src/test/results/clientpositive/llap/union4.q.out +++ b/ql/src/test/results/clientpositive/llap/union4.q.out @@ -23,6 +23,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -30,7 +31,8 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -51,7 +53,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -93,7 +95,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -117,6 +147,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -136,6 +179,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/llap/union6.q.out b/ql/src/test/results/clientpositive/llap/union6.q.out index dca14c1..10c6de7 100644 --- a/ql/src/test/results/clientpositive/llap/union6.q.out +++ b/ql/src/test/results/clientpositive/llap/union6.q.out @@ -23,14 +23,16 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 4 <- Union 3 (CONTAINS) + Map 5 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -51,7 +53,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: s2 @@ -68,6 +70,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -90,6 +105,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -109,6 +152,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out index b68a75a..9e7a37d 100644 --- a/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out +++ b/ql/src/test/results/clientpositive/llap/unionDistinct_1.q.out @@ -27,17 +27,19 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -58,10 +60,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 10 Map Operator Tree: TableScan - alias: s2 + alias: s3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -76,10 +78,10 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 9 + Map 8 Map Operator Tree: TableScan - alias: s3 + alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE @@ -94,7 +96,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 10 + Reducer 11 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -176,7 +178,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -219,6 +249,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION DISTINCT @@ -502,17 +539,19 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 9 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE), Union 5 (CONTAINS) Reducer 6 <- Union 5 (SIMPLE_EDGE) - Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -533,13 +572,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 7 + Map 10 Map Operator Tree: TableScan - alias: s2 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + alias: s3 + Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) mode: hash @@ -551,13 +590,13 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 9 + Map 8 Map Operator Tree: TableScan - alias: s3 - Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE + alias: s2 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - Statistics: Num rows: 1000 Data size: 10603 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) mode: hash @@ -569,7 +608,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 10 + Reducer 11 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -651,7 +690,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable12 - Reducer 8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -694,6 +761,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable12 + PREHOOK: query: insert overwrite table tmptable12 select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION DISTINCT @@ -2746,6 +2820,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -2754,10 +2830,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2778,7 +2856,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -2858,6 +2936,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 5 Execution mode: llap Reduce Operator Tree: @@ -2879,6 +2970,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 34000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -2898,6 +3032,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -3605,6 +3753,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -3613,9 +3763,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Union 3 (CONTAINS) + Map 7 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -3636,7 +3788,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -3696,6 +3848,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest118 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 68000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE @@ -3707,6 +3872,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest218 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -3726,6 +3934,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest118 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest218 + Stage: Stage-1 Move Operator tables: @@ -4433,6 +4655,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -4441,9 +4665,11 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 5 <- Union 3 (CONTAINS) + Map 7 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -4464,7 +4690,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -4533,6 +4759,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest119 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 205 Data size: 55760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -4545,6 +4784,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest219 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 114000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -4564,6 +4846,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest119 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest219 + Stage: Stage-1 Move Operator tables: @@ -6499,6 +6795,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -6506,35 +6803,36 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 4 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: dst_union22_delta - Statistics: Num rows: 500 Data size: 20936 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 221500 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (UDFToDouble(k0) <= 50.0) (type: boolean) - Statistics: Num rows: 166 Data size: 6950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 73538 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: k1 (type: string), k2 (type: string), k3 (type: string), k4 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 166 Data size: 6950 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 348 Data size: 40548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: aaaa sort order: ++++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 348 Data size: 40548 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: llap @@ -6550,7 +6848,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -6591,54 +6889,29 @@ STAGE PLANS: name: default.dst_union22_delta Truncated Path -> Alias: /dst_union22_delta/ds=1 [dst_union22_delta] - Map 4 + Map 5 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 103124 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 500 Data size: 181000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: (UDFToDouble(k1) > 20.0) (type: boolean) - Statistics: Num rows: 166 Data size: 30544 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: k1 (type: string), k2 (type: string), ds (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 30544 Basic stats: COMPLETE Column stats: PARTIAL - Map Join Operator - condition map: - Left Outer Join0 to 1 - filter mappings: - 0 [1, 1] - filter predicates: - 0 {(_col2 = '1')} - 1 - Estimated key counts: Map 5 => 55 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col4, _col5 - input vertices: - 1 Map 5 - Position of Big Table: 0 - Statistics: Num rows: 182 Data size: 33598 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 182 Data size: 33598 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 348 Data size: 40548 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - null sort order: aaaa - sort order: ++++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 348 Data size: 40548 Basic stats: COMPLETE Column stats: NONE - tag: -1 - auto parallelism: true + Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 60092 Basic stats: COMPLETE Column stats: COMPLETE + tag: 0 + value expressions: _col1 (type: string), _col2 (type: string) + auto parallelism: true Execution mode: llap LLAP IO: no inputs Path -> Alias: @@ -6652,7 +6925,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -6693,26 +6966,26 @@ STAGE PLANS: name: default.dst_union22 Truncated Path -> Alias: /dst_union22/ds=1 [a] - Map 5 + Map 7 Map Operator Tree: TableScan alias: dst_union22_delta - Statistics: Num rows: 500 Data size: 20936 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 176000 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: ((UDFToDouble(k0) > 50.0) and (UDFToDouble(k1) > 20.0)) (type: boolean) - Statistics: Num rows: 55 Data size: 2302 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: k1 (type: string), k3 (type: string), k4 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 55 Data size: 2302 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 55 Data size: 2302 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 55 Data size: 14575 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string), _col2 (type: string) auto parallelism: true @@ -6729,7 +7002,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -6778,14 +7051,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 174 Data size: 20274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2/ - Statistics: Num rows: 174 Data size: 20274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -6809,23 +7082,113 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Union 2 - Vertex: Union 2 - - Stage: Stage-2 - Dependency Collection - - Stage: Stage-0 - Move Operator - tables: - partition: - ds 2 - replace: true -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 332 Data size: 146412 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2053 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 2053 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: true + Reducer 4 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2005 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2005 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2005 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 6 + Execution mode: llap + Needs Tagging: false + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join0 to 1 + filter mappings: + 0 [1, 1] + filter predicates: + 0 {(VALUE._col1 = '1')} + 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col4, _col5 + Position of Big Table: 0 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: string), _col5 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 166 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + null sort order: aaaa + sort order: ++++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 332 Data size: 118192 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + auto parallelism: true + Union 2 + Vertex: Union 2 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2 + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -6846,6 +7209,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false + PREHOOK: query: insert overwrite table dst_union22 partition (ds='2') select * from ( @@ -9241,54 +9612,54 @@ STAGE PLANS: keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 18624 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -9298,20 +9669,20 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(1) keys: _col0 (type: string), _col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 375 Data size: 69750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 65472 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: bigint), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 375 Data size: 69750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 65472 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 375 Data size: 69750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 65472 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -9324,30 +9695,30 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 9312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 17835 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 4656 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 205 Data size: 35670 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 375 Data size: 66750 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 352 Data size: 62656 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 Union 5 @@ -10854,6 +11225,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -10861,10 +11233,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 7 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 6 (CONTAINS) - Reducer 7 <- Union 2 (CONTAINS), Union 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 7 (CONTAINS) + Reducer 8 <- Union 2 (CONTAINS), Union 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -10888,7 +11261,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -10911,7 +11284,7 @@ STAGE PLANS: value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 8 + Map 9 Map Operator Tree: TableScan alias: src @@ -10934,6 +11307,25 @@ STAGE PLANS: value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -10954,7 +11346,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -10973,7 +11393,7 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -10995,29 +11415,10 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 46500 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) - Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 - Union 6 - Vertex: Union 6 + Union 7 + Vertex: Union 7 Stage: Stage-2 Dependency Collection @@ -11035,6 +11436,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + PREHOOK: query: insert overwrite table union_subq_union select * from ( select key, value from src @@ -11132,6 +11540,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -11139,10 +11548,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 4 <- Union 5 (CONTAINS) - Map 7 <- Union 5 (CONTAINS) + Map 5 <- Union 6 (CONTAINS) + Map 8 <- Union 6 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 6 <- Union 2 (CONTAINS), Union 5 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Union 2 (CONTAINS), Union 6 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -11166,7 +11576,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -11187,7 +11597,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 7 + Map 8 Map Operator Tree: TableScan alias: src @@ -11228,7 +11638,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union29 - Reducer 6 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -11248,8 +11686,8 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 - Union 5 - Vertex: Union 5 + Union 6 + Vertex: Union 6 Stage: Stage-2 Dependency Collection @@ -11267,6 +11705,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union29 + PREHOOK: query: insert overwrite table union_subq_union29 select * from ( select key, value from src @@ -11693,6 +12138,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -11700,12 +12146,13 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 12 <- Union 4 (CONTAINS) - Reducer 11 <- Map 10 (SIMPLE_EDGE), Union 8 (CONTAINS) + Map 13 <- Union 4 (CONTAINS) + Reducer 10 <- Union 2 (CONTAINS), Union 9 (SIMPLE_EDGE) + Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 9 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE), Union 4 (CONTAINS) Reducer 5 <- Union 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 8 (CONTAINS) - Reducer 9 <- Union 2 (CONTAINS), Union 8 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 7 (SIMPLE_EDGE), Union 9 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -11729,7 +12176,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 10 + Map 11 Map Operator Tree: TableScan alias: src @@ -11752,7 +12199,7 @@ STAGE PLANS: value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 12 + Map 13 Map Operator Tree: TableScan alias: src @@ -11773,7 +12220,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -11796,7 +12243,29 @@ STAGE PLANS: value expressions: _col2 (type: bigint) Execution mode: llap LLAP IO: no inputs - Reducer 11 + Reducer 10 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 12 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -11853,7 +12322,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union30 - Reducer 7 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -11872,34 +12369,12 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 9 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 Union 4 Vertex: Union 4 - Union 8 - Vertex: Union 8 + Union 9 + Vertex: Union 9 Stage: Stage-2 Dependency Collection @@ -11917,6 +12392,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union30 + PREHOOK: query: insert overwrite table union_subq_union30 select * from ( @@ -12064,6 +12546,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -12073,9 +12557,11 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Map 1 <- Union 2 (CONTAINS) - Map 5 <- Union 2 (CONTAINS) + Map 7 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 3 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -12099,7 +12585,7 @@ STAGE PLANS: Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: no inputs - Map 5 + Map 7 Map Operator Tree: TableScan alias: t2 @@ -12150,6 +12636,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string) outputColumnNames: _col1 @@ -12170,6 +12669,21 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -12187,6 +12701,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 2 Vertex: Union 2 @@ -12206,6 +12748,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 + Stage: Stage-1 Move Operator tables: @@ -12324,6 +12880,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -12335,7 +12893,9 @@ STAGE PLANS: Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Map 8 (SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -12361,7 +12921,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: t2 @@ -12440,6 +13000,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -12458,10 +13031,53 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 2 Data size: 14 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -12496,6 +13112,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 + Stage: Stage-1 Move Operator tables: @@ -12656,6 +13286,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -12664,10 +13296,12 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 6 <- Union 3 (CONTAINS) + Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -12693,7 +13327,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 8 Map Operator Tree: TableScan alias: t2 @@ -12773,6 +13407,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -12791,6 +13438,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 2 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -12810,6 +13500,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 + Stage: Stage-1 Move Operator tables: @@ -13674,6 +14378,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -13682,7 +14387,8 @@ STAGE PLANS: Edges: Map 1 <- Union 2 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) - Reducer 5 <- Map 4 (SIMPLE_EDGE), Union 2 (CONTAINS) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -13709,7 +14415,7 @@ STAGE PLANS: Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -13748,7 +14454,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -13790,6 +14524,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, value FROM src @@ -13846,15 +14587,17 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 5 <- Union 3 (CONTAINS) + Map 6 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -13880,7 +14623,7 @@ STAGE PLANS: value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -13943,6 +14686,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 227 Data size: 61517 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -13962,6 +14733,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, cast(COUNT(*) as string) AS value FROM src @@ -14127,14 +14905,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src10_1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -14144,78 +14922,78 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 4 - Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 4 Map Operator Tree: TableScan alias: src10_2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 5 Map Operator Tree: TableScan alias: src10_3 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 8 Map Operator Tree: TableScan alias: src10_4 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -14225,10 +15003,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14240,17 +15018,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 Union 6 @@ -14328,19 +15106,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src10_1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -14348,61 +15126,61 @@ STAGE PLANS: Map Operator Tree: TableScan alias: src10_2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 6 Map Operator Tree: TableScan alias: src10_3 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 9 Map Operator Tree: TableScan alias: src10_4 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -14415,17 +15193,17 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 114 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2492 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -14433,10 +15211,10 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 103 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14448,17 +15226,17 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 21 Data size: 218 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE Union 3 Vertex: Union 3 Union 7 @@ -14535,6 +15313,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -14543,7 +15322,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -14564,7 +15344,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: s2 @@ -14624,7 +15404,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -14665,6 +15473,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION DISTINCT @@ -14876,15 +15691,17 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: - Map 5 <- Union 3 (CONTAINS) + Map 6 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -14905,7 +15722,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 5 + Map 6 Map Operator Tree: TableScan alias: s2 @@ -14964,6 +15781,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 13 Data size: 3536 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Union 3 Vertex: Union 3 @@ -14983,6 +15828,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION DISTINCT diff --git a/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out b/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out index b13c0ee..8b99d17 100644 --- a/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out @@ -294,7 +294,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 4 numRows 20 rawDataSize 4552 @@ -627,7 +627,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"cbigint\":\"true\",\"cboolean1\":\"true\",\"cboolean2\":\"true\",\"cdouble\":\"true\",\"cfloat\":\"true\",\"cint\":\"true\",\"csmallint\":\"true\",\"cstring1\":\"true\",\"cstring2\":\"true\",\"ctimestamp1\":\"true\",\"ctimestamp2\":\"true\",\"ctinyint\":\"true\"}} numFiles 2 numRows 20 rawDataSize 4389 diff --git a/ql/src/test/results/clientpositive/llap/union_remove_26.q.out b/ql/src/test/results/clientpositive/llap/union_remove_26.q.out index 67fef54..583717a 100644 --- a/ql/src/test/results/clientpositive/llap/union_remove_26.q.out +++ b/ql/src/test/results/clientpositive/llap/union_remove_26.q.out @@ -135,128 +135,12 @@ POSTHOOK: query: explain SELECT count(1) as rowcnt, min(val) as ms, max(val) as mx from inputTbl3 POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-1 depends on stages: Stage-2 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-2 - Tez -#### A masked pattern was here #### - Edges: - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: inputtbl1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: val (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1), min(_col1), max(_col1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 3 - Map Operator Tree: - TableScan - alias: inputtbl2 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: val (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1), min(_col1), max(_col1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int) - Execution mode: llap - LLAP IO: no inputs - Map 5 - Map Operator Tree: - TableScan - alias: inputtbl3 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: val (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(1), min(_col1), max(_col1) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int) - Execution mode: llap - LLAP IO: no inputs - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), min(VALUE._col1), max(VALUE._col2) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-1 + Stage: Stage-0 Fetch Operator - limit: -1 + limit: 3 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/llap/union_stats.q.out b/ql/src/test/results/clientpositive/llap/union_stats.q.out index 1f3dc82..69b25a9 100644 --- a/ql/src/test/results/clientpositive/llap/union_stats.q.out +++ b/ql/src/test/results/clientpositive/llap/union_stats.q.out @@ -425,7 +425,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 10624 @@ -460,7 +460,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1000 rawDataSize 10624 diff --git a/ql/src/test/results/clientpositive/llap/union_top_level.q.out b/ql/src/test/results/clientpositive/llap/union_top_level.q.out index 52926b6..10f4b1f 100644 --- a/ql/src/test/results/clientpositive/llap/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/llap/union_top_level.q.out @@ -664,6 +664,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -671,8 +672,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -693,11 +695,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -715,11 +716,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -737,7 +737,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -763,7 +762,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -785,7 +812,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -807,6 +847,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -826,6 +879,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top + PREHOOK: query: insert into table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a union all @@ -884,6 +944,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -891,8 +952,9 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 4 <- Union 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -913,11 +975,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -935,11 +996,10 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -957,7 +1017,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 261 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Execution mode: llap LLAP IO: no inputs @@ -983,7 +1042,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1005,7 +1092,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 8 Execution mode: llap Reduce Operator Tree: Select Operator @@ -1027,6 +1127,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 819 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Union 3 Vertex: Union 3 @@ -1046,6 +1159,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top + PREHOOK: query: insert overwrite table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a union all diff --git a/ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out b/ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out index 5b17144..ea1913d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_adaptor_usage_mode.q.out @@ -125,17 +125,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -203,17 +203,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -281,17 +281,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -359,17 +359,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -437,7 +437,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3] @@ -449,19 +449,19 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 5, 8] selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 4:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 5:string, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 6:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 7:string) -> 8:boolean - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -531,7 +531,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3] @@ -543,19 +543,19 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 5, 8] selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 4:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 5:string, StringGroupColEqualStringGroupColumn(col 6, col 7)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 6:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 7:string) -> 8:boolean - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -615,14 +615,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -719,17 +719,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -787,14 +787,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -891,17 +891,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -964,22 +964,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -1011,13 +1011,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1068,7 +1068,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1080,7 +1080,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 3] selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:int - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) Group By Vectorization: @@ -1093,7 +1093,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -1102,7 +1102,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1136,13 +1136,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out b/ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index f633594..fcaab34 100644 --- a/ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -52,12 +52,12 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_4] Group By Operator [GBY_3] (rows=1 width=188) Output:["_col0","_col1"],aggregations:["max(dt)","max(greg_dt)"] - Select Operator [SEL_2] (rows=3 width=102) + Select Operator [SEL_2] (rows=1 width=102) Output:["dt","greg_dt"] - Filter Operator [FIL_8] (rows=3 width=102) + Filter Operator [FIL_8] (rows=1 width=102) predicate:(id = 5) TableScan [TS_0] (rows=7 width=102) - default@testvec,testvec,Tbl:COMPLETE,Col:NONE,Output:["id","dt","greg_dt"] + default@testvec,testvec,Tbl:COMPLETE,Col:COMPLETE,Output:["id","dt","greg_dt"] PREHOOK: query: select max(dt), max(greg_dt) from testvec where id=5 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index a0ac248..6f3df14 100644 --- a/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -68,32 +68,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -102,10 +102,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -128,13 +128,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -202,25 +202,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -228,7 +228,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: vectorOutput: false @@ -237,12 +237,12 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap Reducer 2 Execution mode: vectorized, llap @@ -264,13 +264,13 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Select Operator Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumns: [] - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -281,14 +281,14 @@ STAGE PLANS: projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -310,13 +310,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -409,25 +409,25 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -435,7 +435,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -445,37 +445,37 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Map 5 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -483,7 +483,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -493,12 +493,12 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 @@ -523,7 +523,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -532,7 +532,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap @@ -544,14 +544,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 511 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -578,7 +578,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -587,7 +587,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Stage: Stage-0 @@ -674,32 +674,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -708,10 +708,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -734,13 +734,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,32 +815,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -849,10 +849,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -875,13 +875,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -980,32 +980,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1014,10 +1014,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1040,13 +1040,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1133,32 +1133,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 8) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1167,10 +1167,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1193,13 +1193,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1265,7 +1265,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1275,7 +1275,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 @@ -1284,7 +1284,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2] selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1293,7 +1293,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1308,7 +1308,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1318,7 +1318,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + 1) (type: int) outputColumnNames: _col0 @@ -1327,7 +1327,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2] selectExpressions: LongColAddLongScalar(col 0, val 1) -> 2:long - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1336,7 +1336,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1356,7 +1356,7 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 128 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1365,10 +1365,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1390,13 +1390,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1457,32 +1457,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 3 Data size: 306 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1491,10 +1491,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1517,13 +1517,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1590,36 +1590,36 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 6) (type: boolean) - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1628,7 +1628,7 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - Statistics: Num rows: 6 Data size: 613 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1637,10 +1637,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1663,13 +1663,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1752,32 +1752,32 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key < 8) and (key < 6)) (type: boolean) - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 93 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) - Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -1786,10 +1786,10 @@ STAGE PLANS: projectedOutputColumns: null mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: llap Reducer 2 @@ -1812,13 +1812,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1900,6 +1900,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1907,30 +1909,34 @@ STAGE PLANS: Stage: Stage-2 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -1938,32 +1944,106 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 16 Data size: 2912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Execution mode: llap + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1981,6 +2061,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -2118,6 +2212,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -2126,32 +2222,34 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Map Operator Tree: TableScan alias: a - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 930 Basic stats: COMPLETE Column stats: COMPLETE Merge Join Operator condition map: Inner Join 0 to 1 @@ -2159,19 +2257,36 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 16 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -2181,56 +2296,93 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 1023 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) - Group By Vectorization: - aggregators: VectorUDAFCountMerge(col 1) -> bigint - className: VectorGroupByOperator - vectorOutput: true - keyExpressions: col 0 - native: false - projectedOutputColumns: [0] keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0, 1] - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 5 Data size: 465 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -2248,6 +2400,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index 40251b4..d1d72be 100644 --- a/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -124,7 +124,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -134,7 +134,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 10) -> boolean predicate: bin is not null (type: boolean) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -142,7 +142,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -156,7 +156,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 3 - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10000 Data size: 6820000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21) (type: int) outputColumnNames: _col0 @@ -165,7 +165,7 @@ STAGE PLANS: native: true projectedOutputColumns: [21] selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7,_col8,_col9,_col10,_col11,_col12,_col13,_col14,_col15,_col16,_col17,_col18,_col19,_col20,_col21)) -> 21:int - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10000 Data size: 6820000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) Group By Vectorization: @@ -176,14 +176,14 @@ STAGE PLANS: projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -199,7 +199,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -209,7 +209,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 10) -> boolean predicate: bin is not null (type: boolean) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -217,7 +217,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col10 (type: binary) sort order: + @@ -226,7 +226,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 34084 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -258,13 +258,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -317,7 +317,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: hundredorc - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -328,7 +328,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() Group By Vectorization: @@ -341,7 +341,7 @@ STAGE PLANS: keys: bin (type: binary) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: binary) sort order: + @@ -350,7 +350,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 50 Data size: 3200 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -384,7 +384,7 @@ STAGE PLANS: keys: KEY._col0 (type: binary) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col0 (type: binary) outputColumnNames: _col0, _col1 @@ -392,13 +392,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0] - Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 14819 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -476,7 +476,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -486,7 +486,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 @@ -494,7 +494,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -508,7 +508,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144 Data size: 16704 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: binary), _col3 (type: binary) outputColumnNames: _col0, _col1, _col2 @@ -516,13 +516,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 10, 11] - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144 Data size: 16704 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 110 Data size: 32601 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 144 Data size: 16704 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -541,7 +541,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -551,7 +551,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2) -> boolean predicate: i is not null (type: boolean) - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i (type: int), bin (type: binary) outputColumnNames: _col0, _col1 @@ -559,7 +559,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 10] - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -568,7 +568,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 100 Data size: 29638 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: binary) Execution mode: vectorized, llap LLAP IO: all inputs diff --git a/ql/src/test/results/clientpositive/llap/vector_bround.q.out b/ql/src/test/results/clientpositive/llap/vector_bround.q.out index 5d1990a..c32521b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_bround.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_bround.q.out @@ -47,7 +47,7 @@ Stage-0 Select Operator [SEL_1] (rows=8 width=16) Output:["_col0","_col1"] TableScan [TS_0] (rows=8 width=16) - default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:NONE,Output:["v0","v1"] + default@test_vector_bround,test_vector_bround,Tbl:COMPLETE,Col:COMPLETE,Output:["v0","v1"] PREHOOK: query: select bround(v0), bround(v1, 1) from test_vector_bround PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_bucket.q.out b/ql/src/test/results/clientpositive/llap/vector_bucket.q.out index 7b57223..f793127 100644 --- a/ql/src/test/results/clientpositive/llap/vector_bucket.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_bucket.q.out @@ -21,6 +21,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -28,6 +29,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -66,35 +68,58 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: UDFToInteger(VALUE._col0) (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [2, 1] - selectExpressions: CastStringToLong(col 0) -> 2:int Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.non_orc_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -112,6 +137,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.non_orc_table + PREHOOK: query: select a, b from non_orc_table order by a PREHOOK: type: QUERY PREHOOK: Input: default@non_orc_table diff --git a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out index f06d49a..e5a5c7d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_cast_constant.q.out @@ -130,7 +130,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -141,7 +141,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2] - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 4196 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(50), avg(50.0), avg(50) Group By Vectorization: @@ -155,12 +155,12 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 59584 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 59584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Execution mode: vectorized, llap @@ -186,11 +186,11 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 17556 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 17556 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: decimal(14,4)) Reducer 3 @@ -210,19 +210,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 524 Data size: 155436 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 133 Data size: 17556 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 2960 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1320 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_char_2.q.out b/ql/src/test/results/clientpositive/llap/vector_char_2.q.out index c330097..41d92a2 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_2.q.out @@ -80,7 +80,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -92,7 +92,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2] selectExpressions: CastStringToLong(col 0) -> 2:int - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -105,7 +105,7 @@ STAGE PLANS: keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + @@ -115,7 +115,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap @@ -150,7 +150,7 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: + @@ -159,7 +159,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 @@ -179,19 +179,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -278,7 +278,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: char_2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -290,7 +290,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2] selectExpressions: CastStringToLong(col 0) -> 2:int - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count() Group By Vectorization: @@ -303,7 +303,7 @@ STAGE PLANS: keys: _col0 (type: char(20)) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - @@ -313,7 +313,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 500 Data size: 99000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized, llap @@ -348,7 +348,7 @@ STAGE PLANS: keys: KEY._col0 (type: char(20)) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: char(20)) sort order: - @@ -357,7 +357,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 @@ -377,19 +377,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 49500 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 214 Data size: 22898 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 5 Data size: 990 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 535 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_char_4.q.out b/ql/src/test/results/clientpositive/llap/vector_char_4.q.out index d164ebe..4f91cf0 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_4.q.out @@ -136,50 +136,74 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: char(20)), _col3 (type: char(30)), _col4 (type: char(20)), _col5 (type: char(20)), _col6 (type: char(50)) + outputColumnNames: ct, csi, ci, cb, cf, cd, cs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ct, 16), compute_stats(csi, 16), compute_stats(ci, 16), compute_stats(cb, 16), compute_stats(cf, 16), compute_stats(cd, 16), compute_stats(cs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -197,3 +221,10 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: ct, csi, ci, cb, cf, cd, cs + Column Types: char(10), char(10), char(20), char(30), char(20), char(20), char(50) + Table: default.char_lazy_binary_columnar + diff --git a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out index d3ab3f2..5a3994f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out @@ -233,12 +233,14 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: @@ -251,10 +253,9 @@ STAGE PLANS: className: VectorLimitOperator native: true Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No TopN IS false + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -262,32 +263,25 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: - Select Vectorization: - className: VectorSelectOperator - native: true - Limit Vectorization: - className: VectorLimitOperator - native: true - Select Vectorization: - className: VectorSelectOperator - native: true - selectExpressions: CastLongToChar(col 0, maxLength 12) -> 1:Char - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Stage: Stage-2 @@ -295,6 +289,8 @@ STAGE PLANS: Stage: Stage-3 + Stage: Stage-4 + PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out index c912b2e..f762f7b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_2.q.out @@ -46,11 +46,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: str2 (type: string), UDFToInteger(COALESCE(str1,0)) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -60,12 +60,12 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: all inputs @@ -81,14 +81,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -141,14 +141,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: COALESCE(str1,0) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -208,7 +208,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -220,7 +220,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 4] selectExpressions: CastStringToLong(col 3)(children: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string) -> 4:int - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 595 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) Group By Vectorization: @@ -233,7 +233,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -242,7 +242,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -276,7 +276,7 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), round((UDFToDouble(_col1) / 60.0), 2) (type: double) outputColumnNames: _col0, _col1 @@ -285,13 +285,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 2] selectExpressions: RoundWithNumDigitsDoubleToDouble(col 3, decimalPlaces 2)(children: DoubleColDivideDoubleScalar(col 2, val 60.0)(children: CastLongToDouble(col 1) -> 2:double) -> 3:double) -> 2:double - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -344,7 +344,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: str_str_orc - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -356,13 +356,13 @@ STAGE PLANS: native: true projectedOutputColumns: [3] selectExpressions: VectorCoalesce(columns [0, 2])(children: col 0, ConstantVectorExpression(val 0) -> 2:string) -> 3:string - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 510 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 736 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index 5eb4035..d649b51 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -53,11 +53,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: m - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -67,14 +67,14 @@ STAGE PLANS: outputColumnNames: _col0, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: bigint), CASE WHEN ((COALESCE(_col2,5) > 1)) THEN (_col2) ELSE (null) END (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -85,16 +85,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: n - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: member (type: bigint), attr (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 5ea4b0f..f0e1b7f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -183,10 +183,10 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2b - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: a is not null (type: boolean) - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 diff --git a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out index 3c03787..fd4e33b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_count_distinct.q.out @@ -1252,7 +1252,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: web_sales - Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] @@ -1263,7 +1263,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [16] - Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2000 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -1274,7 +1274,7 @@ STAGE PLANS: keys: ws_order_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 90 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -1283,7 +1283,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2000 Data size: 3520000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 90 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -1314,7 +1314,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 1760000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 90 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0) Group By Vectorization: @@ -1325,14 +1325,14 @@ STAGE PLANS: projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1354,13 +1354,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_data_types.q.out b/ql/src/test/results/clientpositive/llap/vector_data_types.q.out index f67bc25..40d7f40 100644 --- a/ql/src/test/results/clientpositive/llap/vector_data_types.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_data_types.q.out @@ -119,15 +119,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: t (type: tinyint), si (type: smallint), i (type: int), b (type: bigint), f (type: float), d (type: double), bo (type: boolean), s (type: string), ts (type: timestamp), dec (type: decimal(4,2)), bin (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: llap @@ -138,13 +138,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: boolean), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: decimal(4,2)), VALUE._col7 (type: binary) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -219,7 +219,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -230,7 +230,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int) sort order: +++ @@ -239,7 +239,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: boolean), _col7 (type: string), _col8 (type: timestamp), _col9 (type: decimal(4,2)), _col10 (type: binary) Execution mode: vectorized, llap @@ -269,19 +269,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 357693 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 6820 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index b3ff557..c9e371a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -54,15 +54,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -71,10 +71,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -117,15 +117,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -134,10 +134,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -180,15 +180,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -197,10 +197,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -243,15 +243,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -260,10 +260,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -306,15 +306,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -323,10 +323,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -369,15 +369,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -386,10 +386,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -432,15 +432,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -449,10 +449,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -495,15 +495,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -512,10 +512,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -558,15 +558,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_1 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( t AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp) sort order: + - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -575,10 +575,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out index 144356c..8c9dd7b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out @@ -43,15 +43,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -60,10 +60,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -106,15 +106,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -123,10 +123,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -169,15 +169,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -186,10 +186,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -232,15 +232,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -249,10 +249,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -295,15 +295,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -312,10 +312,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -358,15 +358,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -375,10 +375,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -421,15 +421,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -438,10 +438,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -484,15 +484,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -501,10 +501,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -558,15 +558,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(t) (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -575,10 +575,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -621,15 +621,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToByte(t) (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -638,10 +638,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: tinyint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -684,15 +684,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToShort(t) (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: smallint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -701,10 +701,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: smallint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -747,15 +747,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(t) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -764,10 +764,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -810,15 +810,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToLong(t) (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -827,10 +827,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -873,15 +873,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToFloat(t) (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -890,10 +890,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -936,15 +936,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToDouble(t) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -953,10 +953,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -999,15 +999,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_2 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToString(t) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -1016,10 +1016,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index 08aca71..7e352db 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -108,7 +108,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,2)) outputColumnNames: _col0 @@ -116,7 +116,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -132,13 +132,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1153 Data size: 129236 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1126 Data size: 252224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -157,7 +157,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -167,7 +167,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: dec is not null (type: boolean) - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(4,0)) outputColumnNames: _col0 @@ -175,7 +175,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(6,2)) sort order: + @@ -184,7 +184,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 117488 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 5d62086..6df5053 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -569,7 +569,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_precision - Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 3584 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -580,7 +580,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 75 Data size: 3472 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 75 Data size: 3584 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(dec), sum(dec) Group By Vectorization: @@ -592,10 +592,10 @@ STAGE PLANS: vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgDecimal(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -619,10 +619,10 @@ STAGE PLANS: aggregations: avg(VALUE._col0), sum(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index 8bd80cf..0947802 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -54,7 +54,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_txt - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -66,7 +66,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + @@ -74,7 +74,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap LLAP IO: no inputs @@ -103,13 +103,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -156,7 +156,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_txt - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -167,7 +167,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + @@ -176,7 +176,7 @@ STAGE PLANS: keyExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap LLAP IO: no inputs @@ -205,13 +205,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -284,15 +284,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_rc - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(10,0)), round(dec, -1) (type: decimal(11,0)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: llap LLAP IO: no inputs @@ -317,13 +317,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -370,15 +370,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_rc - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dec (type: decimal(10,0)) outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: llap LLAP IO: no inputs @@ -403,13 +403,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -482,7 +482,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -494,7 +494,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(10,0)) sort order: + @@ -502,7 +502,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -531,13 +531,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -584,7 +584,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -595,7 +595,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: round(_col0, -1) (type: decimal(11,0)) sort order: + @@ -604,7 +604,7 @@ STAGE PLANS: keyExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 1:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -633,13 +633,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index 6083a10..78b1a6e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -58,7 +58,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_1_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -70,7 +70,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13] selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 3:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 4:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 5:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 13:decimal(21,0) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -78,7 +78,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -107,13 +107,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -209,7 +209,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_2_orc - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -221,7 +221,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] selectExpressions: FuncRoundDecimalToDecimal(col 0) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 4:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 5:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 6:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 7:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 11:decimal(21,0), FuncRoundDecimalToDecimal(col 1) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 0) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 1) -> 14:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 2) -> 15:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 3) -> 16:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 4) -> 17:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -1) -> 18:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -2) -> 19:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -3) -> 20:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces -4) -> 21:decimal(21,0) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -229,7 +229,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -258,13 +258,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19] - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -387,7 +387,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_3_orc - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -399,7 +399,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -15) -> 1:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -16) -> 2:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -13) -> 3:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -14) -> 4:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -11) -> 5:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -12) -> 6:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -9) -> 7:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -10) -> 8:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -7) -> 9:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -8) -> 10:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -5) -> 11:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -6) -> 12:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -3) -> 13:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -4) -> 14:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -1) -> 15:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces -2) -> 16:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 0) -> 17:decimal(21,0), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 1) -> 18:decimal(22,1), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 2) -> 19:decimal(23,2), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 3) -> 20:decimal(24,3), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 4) -> 21:decimal(25,4), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 5) -> 22:decimal(26,5), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 6) -> 23:decimal(27,6), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 7) -> 24:decimal(28,7), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 8) -> 25:decimal(29,8), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 26:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 10) -> 27:decimal(31,10), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 11) -> 28:decimal(32,11), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 12) -> 29:decimal(33,12), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 13) -> 30:decimal(34,13), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 14) -> 31:decimal(35,14), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 15) -> 32:decimal(36,15), FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 16) -> 33:decimal(37,16) - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(21,0)) sort order: + @@ -407,7 +407,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -436,13 +436,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 29, 30, 31, 32] - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -554,7 +554,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_tbl_4_orc - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -566,7 +566,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3] selectExpressions: FuncRoundWithNumDigitsDecimalToDecimal(col 0, decimalPlaces 9) -> 2:decimal(30,9), FuncRoundWithNumDigitsDecimalToDecimal(col 1, decimalPlaces 9) -> 3:decimal(30,9) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: decimal(30,9)) sort order: + @@ -574,7 +574,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -604,13 +604,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 3] selectExpressions: ConstantVectorExpression(val 1809242.315111134) -> 2:decimal(17,9), ConstantVectorExpression(val -1809242.315111134) -> 3:decimal(17,9) - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index 867126f..99c7913 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -65,14 +65,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + key) (type: decimal(21,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -149,14 +149,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key + CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -233,14 +233,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) + (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -317,14 +317,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) + 1.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -401,14 +401,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key - key) (type: decimal(21,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -485,14 +485,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key - CAST( value AS decimal(10,0))) (type: decimal(21,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -569,14 +569,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) - (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -653,14 +653,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) - 1.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -737,14 +737,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key * key) (type: decimal(38,17)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -821,17 +821,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((key * CAST( value AS decimal(10,0))) > 0) (type: boolean) - Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 1356 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 1392 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -893,14 +893,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key * CAST( value AS decimal(10,0))) (type: decimal(31,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -977,14 +977,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) * (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1061,14 +1061,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) * 2.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1145,17 +1145,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / 0) (type: decimal(22,12)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1195,17 +1195,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) / null) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 113 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1245,17 +1245,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / key) (type: decimal(38,18)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1328,17 +1328,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (key / CAST( value AS decimal(10,0))) (type: decimal(31,21)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1401,17 +1401,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (value <> 0) (type: boolean) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (UDFToDouble(key) / (UDFToDouble(value) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1474,14 +1474,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (1.0 + (UDFToDouble(key) / 2.0)) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1558,14 +1558,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: abs(key) (type: decimal(20,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1646,22 +1646,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(key), count(key), avg(key) keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 5768 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 5768 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1673,15 +1673,15 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 3304 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), (_col1 / CAST( _col2 AS decimal(19,0))) (type: decimal(38,18)), _col3 (type: decimal(24,14)), _col1 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4760 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4760 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) Reducer 3 Execution mode: vectorized, llap @@ -1689,10 +1689,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: decimal(38,18)), VALUE._col1 (type: decimal(24,14)), VALUE._col2 (type: decimal(30,10)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4760 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 4760 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1746,14 +1746,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (- key) (type: decimal(20,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1895,14 +1895,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ceil(key) (type: decimal(11,0)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1979,14 +1979,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: floor(key) (type: decimal(11,0)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2063,14 +2063,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: round(key, 2) (type: decimal(13,2)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2147,14 +2147,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: power(key, 2) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 304 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2231,14 +2231,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ((key + 1) % (key / 2)) (type: decimal(22,12)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2318,22 +2318,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev(key), variance(key) keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2345,10 +2345,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2405,22 +2405,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)), value (type: int) outputColumnNames: key, value - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: stddev_samp(key), var_samp(key) keys: value (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2296 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 2296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2432,10 +2432,10 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 14 Data size: 280 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2492,19 +2492,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: _col0 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: histogram_numeric(_col0, 3) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: array) Execution mode: llap LLAP IO: all inputs @@ -2515,10 +2515,10 @@ STAGE PLANS: aggregations: histogram_numeric(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2559,19 +2559,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(key) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2582,10 +2582,10 @@ STAGE PLANS: aggregations: min(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2626,19 +2626,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(key) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2649,10 +2649,10 @@ STAGE PLANS: aggregations: max(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2693,19 +2693,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: decimal(20,10)) outputColumnNames: key - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(key) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2716,10 +2716,10 @@ STAGE PLANS: aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out index e00de78..da4939e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_udf2.q.out @@ -73,7 +73,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4256 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -83,7 +83,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: NaN (type: double), NaN (type: double), 1.4711276743037347 (type: double), -0.8390715290764524 (type: double), -0.5440211108893698 (type: double), 0.6483608274590866 (type: double), 0.17453292519943295 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -92,13 +92,13 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8] selectExpressions: ConstantVectorExpression(val NaN) -> 2:double, ConstantVectorExpression(val NaN) -> 3:double, ConstantVectorExpression(val 1.4711276743037347) -> 4:double, ConstantVectorExpression(val -0.8390715290764524) -> 5:double, ConstantVectorExpression(val -0.5440211108893698) -> 6:double, ConstantVectorExpression(val 0.6483608274590866) -> 7:double, ConstantVectorExpression(val 0.17453292519943295) -> 8:double - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -162,7 +162,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: decimal_udf2 - Statistics: Num rows: 38 Data size: 4296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 38 Data size: 4408 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -172,7 +172,7 @@ STAGE PLANS: native: true predicateExpression: FilterDecimalColEqualDecimalScalar(col 0, val 10) -> boolean predicate: (key = 10) (type: boolean) - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: 22026.465794806718 (type: double), 2.302585092994046 (type: double), 2.302585092994046 (type: double), 1.0 (type: double), log(10, value) (type: double), log(value, 10) (type: double), 1.0 (type: double), 3.1622776601683795 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -181,13 +181,13 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9] selectExpressions: ConstantVectorExpression(val 22026.465794806718) -> 2:double, ConstantVectorExpression(val 2.302585092994046) -> 3:double, ConstantVectorExpression(val 2.302585092994046) -> 4:double, ConstantVectorExpression(val 1.0) -> 5:double, FuncLogWithBaseLongToDouble(col 1) -> 6:double, VectorUDFAdaptor(log(value, 10)) -> 7:double, ConstantVectorExpression(val 1.0) -> 8:double, ConstantVectorExpression(val 3.1622776601683795) -> 9:double - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 19 Data size: 2148 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index 3b3b801..eb8717e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -35,6 +35,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -43,6 +44,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -143,6 +146,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -160,6 +217,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 7e21493..88f84cd 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -35,6 +35,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -43,6 +44,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -143,6 +146,60 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumns: [0] + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -160,6 +217,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out index 2fa1efe..9c04e2d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_cube1.q.out @@ -594,6 +594,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -604,8 +606,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -684,10 +688,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials @@ -699,7 +731,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -721,6 +753,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -738,6 +798,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index 4deef94..83e2f6e 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -381,22 +381,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: string), b (type: string), (c + d) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2) keys: _col0 (type: string), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 6 Data size: 1068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -408,12 +408,12 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), 0 (type: int) mode: partials outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2184 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 24 Data size: 4272 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -423,15 +423,15 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: final outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2184 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Select Operator expressions: _col0 (type: string), _col1 (type: string), _col3 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2136 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index 678db83..4eda3a7 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -50,22 +50,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: category (type: int), live (type: int), comments (type: int) outputColumnNames: category, live, comments - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(live), max(comments) keys: category (type: int), 0 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) - Statistics: Num rows: 6 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -77,16 +77,16 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE pruneGroupingSetId: true Filter Operator predicate: (_col3 > 0) (type: boolean) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col3 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reducer 3 Execution mode: llap @@ -94,7 +94,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int), KEY.reducesinkkey1 (type: int) outputColumnNames: _col0, _col2, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -115,14 +115,14 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out index 02fc90d..18a0f08 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_reduce.q.out @@ -247,7 +247,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] @@ -258,7 +258,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [9] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -269,7 +269,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -279,7 +279,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -311,19 +311,19 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -418,7 +418,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] @@ -429,7 +429,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [9] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 4000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -440,7 +440,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -449,7 +449,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -480,7 +480,7 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(_col0) Group By Vectorization: @@ -493,7 +493,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -501,7 +501,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -509,7 +509,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -527,13 +527,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -698,7 +698,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 11816 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] @@ -709,7 +709,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 9, 10] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 11816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ss_quantity) Group By Vectorization: @@ -722,7 +722,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -731,7 +731,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -765,7 +765,7 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -773,7 +773,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0, 2] - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), sum(_col2) Group By Vectorization: @@ -786,7 +786,7 @@ STAGE PLANS: keys: _col1 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -794,7 +794,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -813,13 +813,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 47 Data size: 940 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -984,7 +984,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 11816 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] @@ -995,7 +995,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 9, 10] - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1000 Data size: 11816 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ss_quantity) Group By Vectorization: @@ -1008,7 +1008,7 @@ STAGE PLANS: keys: ss_ticket_number (type: int), ss_item_sk (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1017,7 +1017,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1000 Data size: 88276 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1051,7 +1051,7 @@ STAGE PLANS: keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -1059,7 +1059,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 0, 2] - Statistics: Num rows: 500 Data size: 44138 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 6000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col2) Group By Vectorization: @@ -1072,7 +1072,7 @@ STAGE PLANS: keys: _col1 (type: int), _col0 (type: int) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ @@ -1080,7 +1080,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reducer 3 Execution mode: vectorized, llap @@ -1099,13 +1099,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 250 Data size: 22069 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 8000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index e737f0b..dd144b4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -431,6 +431,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -441,8 +443,10 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -500,7 +504,7 @@ STAGE PLANS: Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 - Execution mode: vectorized, llap + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -521,7 +525,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -536,8 +568,8 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 18 Data size: 3078 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 - Execution mode: vectorized, llap + Reducer 6 + Execution mode: llap Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -558,6 +590,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 9 Data size: 1539 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -575,6 +635,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index fc7b9a3..da93871 100644 --- a/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -54,7 +54,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -64,7 +64,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int) outputColumnNames: _col0 @@ -72,7 +72,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -89,7 +89,7 @@ STAGE PLANS: outputColumnNames: _col1 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int) outputColumnNames: _col0 @@ -97,13 +97,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -127,7 +127,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -137,7 +137,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 @@ -145,7 +145,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -154,7 +154,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -214,7 +214,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -224,7 +224,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int) outputColumnNames: _col0 @@ -232,7 +232,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -250,13 +250,13 @@ STAGE PLANS: outputColumnNames: _col0 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -280,7 +280,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -290,7 +290,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: _col0 @@ -298,7 +298,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -309,7 +309,7 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -318,7 +318,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -410,7 +410,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -420,7 +420,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int) outputColumnNames: _col0 @@ -428,7 +428,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -446,7 +446,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1 @@ -454,13 +454,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 0] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -485,7 +485,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -495,7 +495,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -503,7 +503,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -512,7 +512,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -573,7 +573,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -583,7 +583,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -591,7 +591,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -600,7 +600,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -621,7 +621,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -631,7 +631,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -639,7 +639,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -658,13 +658,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -729,7 +729,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -739,7 +739,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -747,7 +747,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -766,7 +766,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), (_col3 * 2) (type: int), (_col0 * 5) (type: int), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 @@ -775,13 +775,13 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 1] selectExpressions: LongColMultiplyLongScalar(col 0, val 2) -> 3:long, LongColMultiplyLongScalar(col 0, val 5) -> 4:long - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -806,7 +806,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -816,7 +816,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -824,7 +824,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -833,7 +833,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -894,7 +894,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -904,7 +904,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -912,7 +912,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -931,7 +931,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col0 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -939,13 +939,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 1, 0] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -970,7 +970,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -980,7 +980,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -988,7 +988,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -997,7 +997,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1058,7 +1058,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1068,7 +1068,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -1076,7 +1076,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1095,7 +1095,7 @@ STAGE PLANS: outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: int), _col2 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -1103,13 +1103,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2, 1] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1134,7 +1134,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1144,7 +1144,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1152,7 +1152,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1161,7 +1161,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1222,7 +1222,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1232,7 +1232,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1240,7 +1240,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1249,7 +1249,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1270,7 +1270,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1280,7 +1280,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -1288,7 +1288,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1307,7 +1307,7 @@ STAGE PLANS: outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col3 (type: string), _col2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -1315,13 +1315,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 1, 0] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1386,7 +1386,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 4 Data size: 364 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1396,7 +1396,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 1, val 2) -> boolean predicate: (a > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: v1 (type: string), a (type: int) outputColumnNames: _col0, _col1 @@ -1404,7 +1404,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -1413,7 +1413,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1434,7 +1434,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 460 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1444,7 +1444,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 2) -> boolean predicate: (c > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c (type: int), v2 (type: string) outputColumnNames: _col0, _col1 @@ -1452,7 +1452,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 91 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -1471,7 +1471,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: int), _col0 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1, _col2 @@ -1479,13 +1479,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 2, 1] - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out b/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index c9b794e..318201f 100644 --- a/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -102,11 +102,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -119,14 +119,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -137,16 +137,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: llap LLAP IO: all inputs @@ -196,11 +196,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -213,14 +213,14 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -231,16 +231,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: llap LLAP IO: all inputs @@ -290,7 +290,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -301,7 +301,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -319,7 +319,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -327,13 +327,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -352,7 +352,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -363,7 +363,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2] - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -372,7 +372,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -430,7 +430,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -441,7 +441,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -459,7 +459,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -467,13 +467,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -492,7 +492,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -503,7 +503,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2] - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -512,7 +512,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -570,7 +570,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -581,7 +581,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -598,7 +598,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -606,13 +606,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -631,7 +631,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -642,7 +642,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2] - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -651,7 +651,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -709,7 +709,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin1 - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -720,7 +720,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -737,7 +737,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) outputColumnNames: _col0, _col1, _col2, _col3 @@ -745,13 +745,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3] - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4 Data size: 409 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 588 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -770,7 +770,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: tjoin2 - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -781,7 +781,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [1, 2] - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -790,7 +790,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4 Data size: 372 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap LLAP IO: all inputs diff --git a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out index d537297..2318d8c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_multi_insert.q.out @@ -84,6 +84,9 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -93,6 +96,11 @@ STAGE PLANS: Stage: Stage-3 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -114,6 +122,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -129,6 +150,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: (rn >= 1000) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -144,16 +178,87 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -171,6 +276,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index 5c3f0e0..4b414fe 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -84,7 +84,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -95,7 +95,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -115,13 +115,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1674 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1674 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -146,7 +146,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -157,7 +157,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -166,7 +166,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -232,7 +232,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t1 - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -243,7 +243,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + @@ -252,7 +252,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6 Data size: 544 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -273,7 +273,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: t2 - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -284,7 +284,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 6 Data size: 550 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -304,13 +304,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1674 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6 Data size: 598 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 9 Data size: 1674 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out b/ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out index 3e4ae94..aa9d903 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partition_diff_num_cols.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_0 - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -109,7 +109,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -120,14 +120,14 @@ STAGE PLANS: projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -159,13 +159,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -267,7 +267,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_1 - Statistics: Num rows: 200 Data size: 13476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] @@ -278,7 +278,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 13476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -289,14 +289,14 @@ STAGE PLANS: projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -328,13 +328,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -436,7 +436,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_2a - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -447,7 +447,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -458,14 +458,14 @@ STAGE PLANS: projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -497,13 +497,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -592,7 +592,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_2b - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 792 Basic stats: COMPLETE Column stats: PARTIAL TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] @@ -603,7 +603,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 792 Basic stats: COMPLETE Column stats: PARTIAL Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -614,14 +614,14 @@ STAGE PLANS: projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -653,13 +653,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -748,7 +748,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: inventory_part_3 - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -759,7 +759,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [3] - Statistics: Num rows: 200 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 780 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(inv_quantity_on_hand) Group By Vectorization: @@ -770,14 +770,14 @@ STAGE PLANS: projectedOutputColumns: [0] mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -809,13 +809,13 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out index 77abb3b..0ec32ca 100644 --- a/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_partitioned_date_time.q.out @@ -946,7 +946,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_date - Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] @@ -957,13 +957,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1181,7 +1181,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: flights_tiny_orc_partitioned_date - Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] @@ -1192,7 +1192,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5] - Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -1201,7 +1201,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 137 Data size: 39448 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Execution mode: vectorized, llap @@ -1231,13 +1231,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 137 Data size: 7672 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 137 Data size: 39593 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col4 (type: int), _col5 (type: date) sort order: ++ @@ -1246,7 +1246,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: timestamp), _col3 (type: float) Reducer 3 @@ -1266,19 +1266,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [2, 3, 4, 5, 0, 1] - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 1400 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 25 Data size: 7225 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out index 8b19c58..fcf5625 100644 --- a/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_string_concat.q.out @@ -122,7 +122,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: over1korc - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 101753 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -134,19 +134,19 @@ STAGE PLANS: native: true projectedOutputColumns: [7, 12, 11] selectExpressions: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 12:String_Family, StringGroupColConcatStringScalar(col 13, val |)(children: StringScalarConcatStringGroupCol(val |, col 11)(children: StringRTrim(col 13)(children: StringGroupColConcatStringScalar(col 11, val )(children: StringScalarConcatStringGroupCol(val , col 7) -> 11:String_Family) -> 13:String_Family) -> 11:String) -> 13:String_Family) -> 11:String_Family - Statistics: Num rows: 1049 Data size: 311170 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1049 Data size: 487785 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9300 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 5920 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 9300 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out b/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out index d583f09..1a2a3c9 100644 --- a/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_struct_in.q.out @@ -59,7 +59,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -69,7 +69,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct('two','3'), const struct('three','1'), const struct('one','1'), const struct('five','2'), const struct('six','1'), const struct('eight','1'), const struct('seven','1'), const struct('nine','1'), const struct('ten','1')) (type: boolean) - Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: string), lineid (type: string) outputColumnNames: _col0, _col1 @@ -77,13 +77,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -182,7 +182,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_1 - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -194,13 +194,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 3] selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, BYTES], structColumnMap [0, 1]) -> 3:boolean - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 346 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -315,7 +315,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -325,7 +325,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct(2,3), const struct(3,1), const struct(1,1), const struct(5,2), const struct(6,1), const struct(8,1), const struct(7,1), const struct(9,1), const struct(10,1)) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: int), lineid (type: int) outputColumnNames: _col0, _col1 @@ -333,13 +333,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -438,7 +438,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_2 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -450,13 +450,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 3] selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [LONG, LONG], structColumnMap [0, 1]) -> 3:boolean - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -571,7 +571,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_3 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -581,7 +581,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> boolean predicate: (struct(id,lineid)) IN (const struct('two',3), const struct('three',1), const struct('one',1), const struct('five',2), const struct('six',1), const struct('eight',1), const struct('seven',1), const struct('nine',1), const struct('ten',1)) (type: boolean) - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: id (type: string), lineid (type: int) outputColumnNames: _col0, _col1 @@ -589,13 +589,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -694,7 +694,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_3 - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -706,13 +706,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 3] selectExpressions: StructColumnInList(structExpressions [col 0, col 1], fieldVectorColumnTypes [BYTES, LONG], structColumnMap [0, 1]) -> 3:boolean - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -830,7 +830,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_4 - Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -840,7 +840,7 @@ STAGE PLANS: native: true predicateExpression: FilterStructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> boolean predicate: (struct(my_bigint,my_string,my_double)) IN (const struct(1,'a',1.5), const struct(1,'b',-0.5), const struct(3,'b',1.5), const struct(1,'d',1.5), const struct(1,'c',1.5), const struct(1,'b',2.5), const struct(1,'b',0.5), const struct(5,'b',1.5), const struct(1,'a',0.5), const struct(3,'b',1.5)) (type: boolean) - Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: my_bigint (type: bigint), my_string (type: string), my_double (type: double) outputColumnNames: _col0, _col1, _col2 @@ -848,13 +848,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2] - Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -956,7 +956,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test_4 - Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2] @@ -968,13 +968,13 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 1, 2, 4] selectExpressions: StructColumnInList(structExpressions [col 0, col 1, col 2], fieldVectorColumnTypes [LONG, BYTES, DOUBLE], structColumnMap [0, 1, 2]) -> 4:boolean - Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 303 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 315 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 0dd278e..e4cdcb1 100644 --- a/ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -57,17 +57,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: concat(c1, c2) (type: string), concat(c3, c4) (type: varchar(30)), (concat(c1, c2) = UDFToString(concat(c3, c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -125,17 +125,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: upper(c2) (type: string), upper(c4) (type: varchar(20)), (upper(c2) = UDFToString(upper(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -193,17 +193,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lower(c2) (type: string), lower(c4) (type: varchar(20)), (lower(c2) = UDFToString(lower(c4))) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -261,17 +261,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ascii(c2) (type: int), ascii(c4) (type: int), (ascii(c2) = ascii(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -329,17 +329,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: concat_ws('|', c1, c2) (type: string), concat_ws('|', c3, c4) (type: string), (concat_ws('|', c1, c2) = concat_ws('|', c3, c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -397,17 +397,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: decode(encode(c2,'US-ASCII'),'US-ASCII') (type: string), decode(encode(c4,'US-ASCII'),'US-ASCII') (type: string), (decode(encode(c2,'US-ASCII'),'US-ASCII') = decode(encode(c4,'US-ASCII'),'US-ASCII')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,17 +465,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: instr(c2, '_') (type: int), instr(c4, '_') (type: int), (instr(c2, '_') = instr(c4, '_')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -533,17 +533,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: replace(c1, '_', c2) (type: string), replace(c3, '_', c4) (type: string), (replace(c1, '_', c2) = replace(c3, '_', c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -601,17 +601,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -669,17 +669,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: next_day(d1, 'TU') (type: string), next_day(d4, 'WE') (type: string), (next_day(d1, 'TU') = next_day(d4, 'WE')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -737,17 +737,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: months_between(d1, d3) (type: double), months_between(d2, d4) (type: double), (months_between(d1, d3) = months_between(d2, d4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -805,17 +805,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: length(c2) (type: int), length(c4) (type: int), (length(c2) = length(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -941,17 +941,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: lpad(c2, 15, ' ') (type: string), lpad(c4, 15, ' ') (type: string), (lpad(c2, 15, ' ') = lpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1009,17 +1009,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ltrim(c2) (type: string), ltrim(c4) (type: string), (ltrim(c2) = ltrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1077,17 +1077,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1145,17 +1145,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1213,17 +1213,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1281,17 +1281,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1349,17 +1349,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rpad(c2, 15, ' ') (type: string), rpad(c4, 15, ' ') (type: string), (rpad(c2, 15, ' ') = rpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1417,17 +1417,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: rtrim(c2) (type: string), rtrim(c4) (type: string), (rtrim(c2) = rtrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1547,17 +1547,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: split(c2, '_') (type: array), split(c4, '_') (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1677,17 +1677,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: substr(c2, 1, 3) (type: string), substr(c4, 1, 3) (type: string), (substr(c2, 1, 3) = substr(c4, 1, 3)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1745,17 +1745,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: trim(c2) (type: string), trim(c4) (type: string), (trim(c2) = trim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1814,19 +1814,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: _col0, _col2 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(_col0, 16), compute_stats(_col2, 16) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs @@ -1837,10 +1837,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1895,19 +1895,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(c2), min(c4) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1918,10 +1918,10 @@ STAGE PLANS: aggregations: min(VALUE._col0), min(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1976,19 +1976,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_udf_1 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 - Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(c2), max(c4) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1999,10 +1999,10 @@ STAGE PLANS: aggregations: max(VALUE._col0), max(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out b/ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out index 559a82b..c88a6fc 100644 --- a/ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_udf_character_length.q.out @@ -47,11 +47,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -70,8 +74,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -89,6 +121,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT character_length(src1.value) PREHOOK: type: QUERY PREHOOK: Input: default@src1 diff --git a/ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out b/ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out index cee8322..b1037c4 100644 --- a/ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_udf_octet_length.q.out @@ -30,11 +30,15 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: @@ -53,8 +57,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: llap LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -72,6 +104,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src1 INSERT OVERWRITE TABLE dest1 SELECT octet_length(src1.value) PREHOOK: type: QUERY PREHOOK: Input: default@src1 diff --git a/ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out b/ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out index 5979f8b..83f7624 100644 --- a/ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_varchar_4.q.out @@ -136,50 +136,74 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar - Execution mode: vectorized, llap + Select Operator + expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: varchar(20)), _col3 (type: varchar(30)), _col4 (type: varchar(20)), _col5 (type: varchar(20)), _col6 (type: varchar(50)) + outputColumnNames: vt, vsi, vi, vb, vf, vd, vs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(vt, 16), compute_stats(vsi, 16), compute_stats(vi, 16), compute_stats(vb, 16), compute_stats(vf, 16), compute_stats(vd, 16), compute_stats(vs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -197,3 +221,10 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: vt, vsi, vi, vb, vf, vd, vs + Column Types: varchar(10), varchar(10), varchar(20), varchar(30), varchar(20), varchar(20), varchar(50) + Table: default.varchar_lazy_binary_columnar + diff --git a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out index b219869..a217c4c 100644 --- a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out @@ -75,15 +75,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_2 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(10)) sort order: + - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap @@ -109,13 +109,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -195,15 +195,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: varchar_2 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: varchar(10)), value (type: varchar(20)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: varchar(10)) sort order: - - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: varchar(20)) Execution mode: vectorized, llap @@ -229,13 +229,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: varchar(20)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 88000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 5 - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 5 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 890 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -301,6 +301,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -308,6 +309,7 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -335,12 +337,10 @@ STAGE PLANS: Reduce Output Operator sort order: Reduce Sink Vectorization: - className: VectorReduceSinkOperator - native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: No TopN IS false + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -349,53 +349,69 @@ STAGE PLANS: enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Execution mode: vectorized, llap + Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - groupByVectorOutput: true - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: int) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [0] Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Limit Vectorization: - className: VectorLimitOperator - native: true Statistics: Num rows: 10 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( _col0 AS varchar(25)) (type: varchar(25)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [1] - selectExpressions: CastLongToVarChar(col 0, maxLength 25) -> 1:VarChar Statistics: Num rows: 10 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 10 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.varchar_3 + Select Operator + expressions: _col0 (type: varchar(25)) + outputColumnNames: field + Statistics: Num rows: 10 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(field, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Reducer 3 + Execution mode: llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Dependency Collection @@ -413,6 +429,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: field + Column Types: varchar(25) + Table: default.varchar_3 + PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out index 8104f3e..6b52c4b 100644 --- a/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_when_case_null.q.out @@ -40,7 +40,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: count_case_groupby - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -52,7 +52,7 @@ STAGE PLANS: native: true projectedOutputColumns: [0, 3] selectExpressions: VectorUDFAdaptor(CASE WHEN (bool) THEN (1) WHEN ((not bool)) THEN (0) ELSE (null) END)(children: NotCol(col 1) -> 2:boolean) -> 3:int - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 456 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1) Group By Vectorization: @@ -65,7 +65,7 @@ STAGE PLANS: keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + @@ -74,7 +74,7 @@ STAGE PLANS: className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 5 Data size: 452 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -108,13 +108,13 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 2 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out b/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out index 872e7f3..b99598a 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_part_project.q.out @@ -70,15 +70,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_part - Statistics: Num rows: 200 Data size: 56096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1592 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: (cdouble + 2.0) (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 56096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double) sort order: + - Statistics: Num rows: 200 Data size: 56096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -103,13 +103,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: double) outputColumnNames: _col0 - Statistics: Num rows: 200 Data size: 56096 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 200 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 2800 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index 3f0afcb..999b667 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -120,7 +120,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -130,7 +130,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -139,7 +139,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -155,7 +155,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -165,7 +165,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -174,7 +174,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -196,14 +196,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -255,7 +255,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -265,7 +265,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -274,7 +274,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -290,15 +290,15 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 2 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: llap LLAP IO: no inputs @@ -316,14 +316,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -375,7 +375,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -385,7 +385,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -394,7 +394,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -410,7 +410,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -420,7 +420,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0) -> boolean predicate: key is not null (type: boolean) - Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: key (type: int) sort order: + @@ -429,7 +429,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 2 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 2 Data size: 208 Basic stats: COMPLETE Column stats: COMPLETE value expressions: value (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -451,14 +451,14 @@ STAGE PLANS: 0 key (type: int) 1 key (type: int) outputColumnNames: _col0, _col1, _col5, _col6 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 2 Data size: 228 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 832 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out index 855a50f..1f5a8ff 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_context.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_context.q.out @@ -109,79 +109,51 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) + Map 1 <- Map 2 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: store_sales - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 72744 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 72744 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: double) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - Map 2 - Map Operator Tree: - TableScan - alias: store - Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: s_store_sk is not null (type: boolean) - Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s_store_sk (type: int), s_city (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 72744 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col0, _col2 input vertices: - 0 Map 1 - Statistics: Num rows: 6682 Data size: 80009 Basic stats: COMPLETE Column stats: NONE + 1 Map 2 + Statistics: Num rows: 6391 Data size: 76692 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col2, _col5 input vertices: 1 Map 3 - Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6724 Data size: 712744 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col4 (type: string), _col2 (type: double) + expressions: _col5 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 - Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6724 Data size: 712744 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 10600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 100 Data size: 10600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -196,23 +168,51 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 3 + Map 2 Map Operator Tree: TableScan alias: household_demographics - Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: hd_demo_sk is not null (type: boolean) - Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hd_demo_sk (type: int) outputColumnNames: _col0 - Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 3 + Map Operator Tree: + TableScan + alias: store + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: s_store_sk is not null (type: boolean) + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: s_store_sk (type: int), s_city (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6075 Data size: 615730 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out index 6000ec4..35f41ff 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out @@ -40,24 +40,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: dtest - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: a (type: int) outputColumnNames: a - Statistics: Num rows: 5 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: a (type: int) mode: final outputColumnNames: _col0 - Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -83,10 +83,10 @@ STAGE PLANS: aggregations: sum(VALUE._col0), count(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index 3948339..7e9b074 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -62,11 +62,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -76,10 +76,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -90,16 +90,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -162,11 +162,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -179,10 +179,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -193,19 +193,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -269,11 +269,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -286,10 +286,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 116 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 1146 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -300,17 +300,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key BETWEEN 100 AND 102 (type: boolean) - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 9 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -370,16 +370,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -387,11 +387,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -401,10 +401,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -465,11 +465,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -482,10 +482,10 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -496,14 +496,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -572,11 +572,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -587,10 +587,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -601,14 +601,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -680,11 +680,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -695,10 +695,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -709,14 +709,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -786,11 +786,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -801,10 +801,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -815,14 +815,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -888,11 +888,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join0 to 1 @@ -903,10 +903,10 @@ STAGE PLANS: input vertices: 1 Map 2 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -917,16 +917,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -993,14 +993,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1008,11 +1008,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1023,10 +1023,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1101,14 +1101,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1116,11 +1116,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1131,10 +1131,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1204,14 +1204,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1219,11 +1219,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1234,10 +1234,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1308,16 +1308,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1325,11 +1325,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Right Outer Join0 to 1 @@ -1340,10 +1340,10 @@ STAGE PLANS: input vertices: 0 Map 1 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 8 Data size: 1528 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1411,14 +1411,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1426,14 +1426,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1448,10 +1448,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1524,14 +1524,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1539,14 +1539,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1561,10 +1561,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1635,14 +1635,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1650,14 +1650,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1672,10 +1672,10 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 4584 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1744,16 +1744,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test1 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1761,16 +1761,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: test2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs @@ -1785,10 +1785,10 @@ STAGE PLANS: 1 _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1910 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 147f5af..019de00 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -141,22 +141,22 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_parquet - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string) outputColumnNames: cint, ctinyint, csmallint, cfloat, cdouble, cstring1 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12288 Data size: 1082638 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 16628 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 16628 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized, llap LLAP IO: no inputs @@ -181,10 +181,10 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 3328 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 36864 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 95 Data size: 3328 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index 6cd31db..da30bb5 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -274,7 +274,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: parquet_types - Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4906 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] @@ -285,7 +285,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 10] - Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4906 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble), max(cdecimal) Group By Vectorization: @@ -299,12 +299,12 @@ STAGE PLANS: keys: ctinyint (type: tinyint) mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + Map-reduce partition columns: _col0 (type: tinyint) - Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 864 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) Execution mode: vectorized, llap LLAP IO: no inputs @@ -329,11 +329,11 @@ STAGE PLANS: keys: KEY._col0 (type: tinyint) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: tinyint) sort order: + - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) Reducer 3 Execution mode: vectorized, llap @@ -352,13 +352,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6] - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 444 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index 6bd8a29..5ea456c 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -145,14 +145,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: true @@ -175,7 +175,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -196,7 +196,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -229,7 +229,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -243,13 +243,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -265,7 +265,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -299,17 +299,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -417,18 +417,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) auto parallelism: true @@ -451,7 +451,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -472,7 +472,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -497,18 +497,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true Execution mode: vectorized, llap @@ -530,7 +530,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -551,7 +551,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -584,13 +584,13 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5 Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -606,7 +606,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -620,13 +620,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -642,7 +642,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -663,17 +663,17 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6467 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), (_col5 - lag_window_0) (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6583 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6583 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -773,14 +773,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int) auto parallelism: true @@ -803,7 +803,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -824,7 +824,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -857,7 +857,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -871,17 +871,17 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -986,14 +986,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: true @@ -1016,7 +1016,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1037,7 +1037,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1070,7 +1070,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1084,13 +1084,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -1106,7 +1106,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1140,17 +1140,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1263,14 +1263,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int) auto parallelism: true @@ -1293,7 +1293,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1314,7 +1314,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1347,7 +1347,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1361,13 +1361,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -1383,7 +1383,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1418,17 +1418,17 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), (_col5 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1543,14 +1543,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int) auto parallelism: true @@ -1573,7 +1573,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1594,7 +1594,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1627,7 +1627,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1641,22 +1641,22 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) null sort order: aaa sort order: +++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Reducer 3 @@ -1672,7 +1672,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -1707,17 +1707,17 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col2 (type: int), (_col2 - lag_window_2) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 3107 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 3107 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1825,14 +1825,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: true @@ -1855,7 +1855,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1876,7 +1876,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1901,18 +1901,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true Execution mode: vectorized, llap @@ -1934,7 +1934,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1955,7 +1955,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1988,7 +1988,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2002,17 +2002,17 @@ STAGE PLANS: output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator isSamplingPred: false predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) auto parallelism: true @@ -2028,13 +2028,13 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2137,18 +2137,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 auto parallelism: true Execution mode: vectorized, llap @@ -2170,7 +2170,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2191,7 +2191,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2216,14 +2216,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) auto parallelism: true @@ -2246,7 +2246,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2267,7 +2267,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2300,17 +2300,17 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 Position of Big Table: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col12 (type: int), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: int), _col18 (type: string), _col19 (type: double), _col20 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 17951 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2339,7 +2339,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: double), VALUE._col6 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2353,17 +2353,17 @@ STAGE PLANS: output shape: _col0: int, _col1: string, _col2: string, _col3: string, _col4: string, _col5: int, _col6: string, _col7: double, _col8: string partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator isSamplingPred: false predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 23062 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) auto parallelism: true @@ -2453,7 +2453,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false PTF Operator Function definitions: @@ -2469,14 +2469,14 @@ STAGE PLANS: partition by: p_mfgr raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string), p_size (type: int) null sort order: aaz sort order: ++- Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Execution mode: llap @@ -2496,7 +2496,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2517,7 +2517,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2550,7 +2550,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2565,13 +2565,13 @@ STAGE PLANS: partition by: _col2 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int) null sort order: aaz sort order: ++- Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Reducer 3 @@ -2586,7 +2586,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey2 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2607,17 +2607,17 @@ STAGE PLANS: window function: GenericUDAFRankEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2722,7 +2722,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false PTF Operator Function definitions: @@ -2738,14 +2738,14 @@ STAGE PLANS: partition by: p_mfgr raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: true @@ -2766,7 +2766,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2787,7 +2787,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2820,7 +2820,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2835,13 +2835,13 @@ STAGE PLANS: partition by: _col2 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -2857,7 +2857,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -2891,17 +2891,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3010,14 +3010,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: true @@ -3040,7 +3040,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3061,7 +3061,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3094,7 +3094,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3108,13 +3108,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -3130,7 +3130,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3164,17 +3164,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3286,14 +3286,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: true @@ -3316,7 +3316,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3337,7 +3337,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3370,7 +3370,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3384,7 +3384,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3406,14 +3406,14 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -3429,7 +3429,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3451,13 +3451,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -3473,7 +3473,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3507,17 +3507,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3636,14 +3636,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: true @@ -3666,7 +3666,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3687,7 +3687,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3720,7 +3720,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3734,13 +3734,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -3756,7 +3756,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -3782,17 +3782,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~FOLLOWING(2) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), count_window_0 (type: bigint), round(sum_window_1, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -3916,14 +3916,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) auto parallelism: true @@ -3946,7 +3946,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3967,7 +3967,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3992,18 +3992,18 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false predicate: p_partkey is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: p_partkey (type: int) null sort order: a sort order: + Map-reduce partition columns: p_partkey (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE tag: 1 auto parallelism: true Execution mode: vectorized, llap @@ -4025,7 +4025,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4046,7 +4046,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4079,7 +4079,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col0, _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4093,17 +4093,17 @@ STAGE PLANS: output shape: _col0: int, _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator isSamplingPred: false predicate: _col0 is not null (type: boolean) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: a sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 13078 Basic stats: COMPLETE Column stats: COMPLETE tag: 0 value expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -4119,13 +4119,13 @@ STAGE PLANS: 1 p_partkey (type: int) outputColumnNames: _col1, _col2, _col5, _col7 Position of Big Table: 0 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6699 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 6699 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -4141,7 +4141,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4188,17 +4188,17 @@ STAGE PLANS: window function: GenericUDAFLagEvaluator window frame: PRECEDING(MAX)~FOLLOWING(MAX) isPivotResult: true - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 22243 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), count_window_2 (type: bigint), _col7 (type: double), round(sum_window_3, 2) (type: double), _col5 (type: int), (_col5 - lag_window_4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 7511 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 28 Data size: 17646 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 29 Data size: 7511 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4309,14 +4309,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int) auto parallelism: true @@ -4339,7 +4339,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4360,7 +4360,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4393,7 +4393,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4407,22 +4407,22 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: string), _col2 (type: string), _col5 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col2 (type: string), _col1 (type: string), _col5 (type: int) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) null sort order: aaa sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 auto parallelism: true Reducer 3 @@ -4440,13 +4440,13 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2899 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4567,24 +4567,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Select Operator expressions: p_mfgr (type: string), p_brand (type: string), p_retailprice (type: double) outputColumnNames: p_mfgr, p_brand, p_retailprice - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5148 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(p_retailprice) keys: p_mfgr (type: string), p_brand (type: string) mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: double) auto parallelism: true @@ -4607,7 +4607,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4628,7 +4628,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4663,11 +4663,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), round(_col2, 2) (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4681,13 +4681,13 @@ STAGE PLANS: output shape: _col0: string, _col1: string, _col2: double partition by: _col0 raw input shape: - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: double) auto parallelism: true @@ -4703,7 +4703,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4723,17 +4723,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(2)~CURRENT - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), _col2 (type: double), round(sum_window_0, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2678 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 13 Data size: 8021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 13 Data size: 2678 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -4878,6 +4878,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -4888,22 +4890,24 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Reducer 2 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 2 (SIMPLE_EDGE) + Reducer 6 <- Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int), p_retailprice (type: double) auto parallelism: true @@ -4926,7 +4930,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4947,7 +4951,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4980,7 +4984,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -4994,13 +4998,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int, _col7: double partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: true @@ -5009,7 +5013,7 @@ STAGE PLANS: null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: string) auto parallelism: true @@ -5025,7 +5029,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) outputColumnNames: _col1, _col2, _col5, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5059,17 +5063,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumDouble window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), _col5 (type: int), rank_window_0 (type: int), dense_rank_window_1 (type: int), round(sum_window_2, 2) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5096,19 +5100,71 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(s, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + auto parallelism: false Reducer 4 Execution mode: llap Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2880 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types struct:struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 5 + Execution mode: llap + Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true notVectorizedReason: PTF Operator (PTF) not supported vectorized: false Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5128,21 +5184,21 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: PRECEDING(5)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: sum_window_0 (type: bigint), _col1 (type: string), _col2 (type: string), _col5 (type: int) outputColumnNames: sum_window_0, _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: sum_window_0 (type: bigint), _col5 (type: int) auto parallelism: true - Reducer 5 + Reducer 6 Execution mode: llap Needs Tagging: false Reduce Vectorization: @@ -5154,7 +5210,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col0, _col2, _col3, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5195,17 +5251,17 @@ STAGE PLANS: name: first_value window function: GenericUDAFFirstValueEvaluator window frame: PRECEDING(2)~FOLLOWING(2) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12974 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: string), _col2 (type: string), _col6 (type: int), UDFToInteger(round(_col0, 1)) (type: int), rank_window_1 (type: int), dense_rank_window_2 (type: int), cume_dist_window_3 (type: double), first_value_window_4 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 2 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -5232,6 +5288,58 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(s2, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(cud, 16), compute_stats(fv1, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + auto parallelism: false + Reducer 7 + Execution mode: llap + Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types struct:struct:struct:struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-3 Dependency Collection @@ -5268,6 +5376,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 + Is Table Level Stats: true + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -5474,14 +5598,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string) null sort order: a sort order: + Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_name (type: string), p_size (type: int) auto parallelism: true @@ -5504,7 +5628,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -5525,7 +5649,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -5558,7 +5682,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5579,7 +5703,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5601,14 +5725,14 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -5624,7 +5748,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5646,13 +5770,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -5668,7 +5792,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5702,17 +5826,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -5855,14 +5979,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string) null sort order: a sort order: + Map-reduce partition columns: p_mfgr (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_name (type: string), p_size (type: int) auto parallelism: true @@ -5885,7 +6009,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -5906,7 +6030,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -5939,7 +6063,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5960,13 +6084,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -5982,7 +6106,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -5996,13 +6120,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: string), _col5 (type: int) auto parallelism: true @@ -6018,7 +6142,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6032,13 +6156,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -6054,7 +6178,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6088,17 +6212,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -6236,14 +6360,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int) auto parallelism: true @@ -6266,7 +6390,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6287,7 +6411,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6320,7 +6444,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6341,13 +6465,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: string), _col5 (type: int) auto parallelism: true @@ -6363,7 +6487,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6384,13 +6508,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -6406,7 +6530,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6440,17 +6564,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -6589,14 +6713,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int) auto parallelism: true @@ -6619,7 +6743,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6640,7 +6764,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6673,7 +6797,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6694,13 +6818,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: string), _col5 (type: int) auto parallelism: true @@ -6716,7 +6840,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6730,7 +6854,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6745,14 +6869,14 @@ STAGE PLANS: partition by: _col2, _col1 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -6768,7 +6892,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6783,13 +6907,13 @@ STAGE PLANS: partition by: _col2, _col1 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -6805,7 +6929,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -6839,17 +6963,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6214 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -6989,14 +7113,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int) auto parallelism: true @@ -7019,7 +7143,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -7040,7 +7164,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -7073,7 +7197,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -7087,7 +7211,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -7109,14 +7233,14 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string) null sort order: a sort order: + Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col1 (type: string), _col5 (type: int) auto parallelism: true @@ -7132,7 +7256,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -7154,13 +7278,13 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -7176,7 +7300,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -7210,17 +7334,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -7354,14 +7478,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: part_orc - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Reduce Output Operator key expressions: p_mfgr (type: string), p_name (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: p_size (type: int) auto parallelism: true @@ -7384,7 +7508,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -7405,7 +7529,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -7438,7 +7562,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -7459,7 +7583,7 @@ STAGE PLANS: output shape: _col1: string, _col2: string, _col5: int partition by: _col2, _col1 raw input shape: - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -7474,14 +7598,14 @@ STAGE PLANS: partition by: _col2, _col1 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Map-side function: true Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string), _col1 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -7497,7 +7621,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -7512,13 +7636,13 @@ STAGE PLANS: partition by: _col2, _col1 raw input shape: transforms raw input: true - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: string) null sort order: aa sort order: ++ Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col5 (type: int) auto parallelism: true @@ -7534,7 +7658,7 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int) outputColumnNames: _col1, _col2, _col5 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE PTF Operator Function definitions: Input definition @@ -7568,17 +7692,17 @@ STAGE PLANS: name: sum window function: GenericUDAFSumLong window frame: PRECEDING(MAX)~CURRENT - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: string), _col1 (type: string), rank_window_0 (type: int), dense_rank_window_1 (type: int), _col5 (type: int), sum_window_2 (type: bigint), sum_window_2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 6422 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index d224bad..1e592ee 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -34,7 +34,7 @@ Stage-0 Select Operator [SEL_1] (rows=2 width=40) Output:["_col0"] TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT ts FROM test PREHOOK: type: QUERY @@ -63,7 +63,7 @@ Stage-0 Stage-1 Reducer 2 llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=1 width=80) + Select Operator [SEL_5] (rows=1 width=92) Output:["_col0","_col1","_col2"] Group By Operator [GBY_4] (rows=1 width=80) Output:["_col0","_col1"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"] @@ -74,7 +74,7 @@ Stage-0 Select Operator [SEL_1] (rows=2 width=40) Output:["ts"] TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY @@ -104,7 +104,7 @@ Stage-0 Filter Operator [FIL_4] (rows=1 width=40) predicate:(ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') PREHOOK: type: QUERY @@ -132,7 +132,7 @@ Stage-0 Select Operator [SEL_1] (rows=2 width=40) Output:["_col0"] TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT ts FROM test PREHOOK: type: QUERY @@ -161,7 +161,7 @@ Stage-0 Stage-1 Reducer 2 vectorized, llap File Output Operator [FS_6] - Select Operator [SEL_5] (rows=1 width=80) + Select Operator [SEL_5] (rows=1 width=92) Output:["_col0","_col1","_col2"] Group By Operator [GBY_4] (rows=1 width=80) Output:["_col0","_col1"],aggregations:["min(VALUE._col0)","max(VALUE._col1)"] @@ -172,7 +172,7 @@ Stage-0 Select Operator [SEL_1] (rows=2 width=40) Output:["ts"] TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT MIN(ts), MAX(ts), MAX(ts) - MIN(ts) FROM test PREHOOK: type: QUERY @@ -202,7 +202,7 @@ Stage-0 Filter Operator [FIL_4] (rows=1 width=40) predicate:(ts) IN (0001-01-01 00:00:00.0, 0002-02-02 00:00:00.0) TableScan [TS_0] (rows=2 width=40) - default@test,test,Tbl:COMPLETE,Col:NONE,Output:["ts"] + default@test,test,Tbl:COMPLETE,Col:COMPLETE,Output:["ts"] PREHOOK: query: SELECT ts FROM test WHERE ts IN (timestamp '0001-01-01 00:00:00.000000000', timestamp '0002-02-02 00:00:00.000000000') PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index ed509de..5f3a435 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -109,7 +109,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -121,7 +121,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFYearTimestamp(col 0, field YEAR) -> 3:long, VectorUDFMonthTimestamp(col 0, field MONTH) -> 4:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 5:long, VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 6:long, VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 7:long, VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 8:long, VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 9:long, VectorUDFSecondTimestamp(col 0, field SECOND) -> 10:long - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -129,7 +129,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -158,13 +158,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -292,7 +292,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -304,7 +304,7 @@ STAGE PLANS: native: true projectedOutputColumns: [2, 3, 4, 5, 6, 7, 8, 9, 10] selectExpressions: VectorUDFUnixTimeStampString(col 1) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 6:long, VectorUDFWeekOfYearString(col 1) -> 7:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 8:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 9:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 10:long - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -312,7 +312,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -341,13 +341,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -475,7 +475,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1684 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -487,7 +487,7 @@ STAGE PLANS: native: true projectedOutputColumns: [4, 5, 6, 7, 8, 9, 10, 11, 12] selectExpressions: LongColEqualLongColumn(col 2, col 3)(children: VectorUDFUnixTimeStampTimestamp(col 0) -> 2:long, VectorUDFUnixTimeStampString(col 1) -> 3:long) -> 4:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFYearTimestamp(col 0, field YEAR) -> 2:long, VectorUDFYearString(col 1, fieldStart 0, fieldLength 4) -> 3:long) -> 5:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMonthTimestamp(col 0, field MONTH) -> 2:long, VectorUDFMonthString(col 1, fieldStart 5, fieldLength 2) -> 3:long) -> 6:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 7:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFDayOfMonthTimestamp(col 0, field DAY_OF_MONTH) -> 2:long, VectorUDFDayOfMonthString(col 1, fieldStart 8, fieldLength 2) -> 3:long) -> 8:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFWeekOfYearTimestamp(col 0, field WEEK_OF_YEAR) -> 2:long, VectorUDFWeekOfYearString(col 1) -> 3:long) -> 9:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFHourTimestamp(col 0, field HOUR_OF_DAY) -> 2:long, VectorUDFHourString(col 1, fieldStart 11, fieldLength 2) -> 3:long) -> 10:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFMinuteTimestamp(col 0, field MINUTE) -> 2:long, VectorUDFMinuteString(col 1, fieldStart 14, fieldLength 2) -> 3:long) -> 11:long, LongColEqualLongColumn(col 2, col 3)(children: VectorUDFSecondTimestamp(col 0, field SECOND) -> 2:long, VectorUDFSecondString(col 1, fieldStart 17, fieldLength 2) -> 3:long) -> 12:long - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) sort order: + @@ -495,7 +495,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -524,13 +524,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1440 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -658,7 +658,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_wrong - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 309 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0] @@ -670,7 +670,7 @@ STAGE PLANS: native: true projectedOutputColumns: [1, 2, 3, 4, 5, 6, 7, 8, 9] selectExpressions: VectorUDFUnixTimeStampString(col 0) -> 1:long, VectorUDFYearString(col 0, fieldStart 0, fieldLength 4) -> 2:long, VectorUDFMonthString(col 0, fieldStart 5, fieldLength 2) -> 3:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 4:long, VectorUDFDayOfMonthString(col 0, fieldStart 8, fieldLength 2) -> 5:long, VectorUDFWeekOfYearString(col 0) -> 6:long, VectorUDFHourString(col 0, fieldStart 11, fieldLength 2) -> 7:long, VectorUDFMinuteString(col 0, fieldStart 14, fieldLength 2) -> 8:long, VectorUDFSecondString(col 0, fieldStart 17, fieldLength 2) -> 9:long - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -678,7 +678,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int) Execution mode: vectorized, llap LLAP IO: all inputs @@ -707,13 +707,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8] - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -792,7 +792,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -803,7 +803,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: @@ -814,14 +814,14 @@ STAGE PLANS: projectedOutputColumns: [0, 1, 2, 3] mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -853,13 +853,13 @@ STAGE PLANS: projectedOutputColumns: [0, 1, 2, 3] mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -918,19 +918,19 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp) outputColumnNames: ctimestamp1 - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctimestamp1) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: llap LLAP IO: all inputs @@ -960,7 +960,7 @@ STAGE PLANS: projectedOutputColumns: [0] mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: round(_col0, 3) (type: double) outputColumnNames: _col0 @@ -969,13 +969,13 @@ STAGE PLANS: native: true projectedOutputColumns: [1] selectExpressions: RoundWithNumDigitsDoubleToDouble(col 0, decimalPlaces 3) -> 1:double - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1042,7 +1042,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1] @@ -1053,7 +1053,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0] - Statistics: Num rows: 40 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 40 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) Group By Vectorization: @@ -1065,10 +1065,10 @@ STAGE PLANS: vectorOutputConditionsNotMet: Vector output of VectorUDAFAvgTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFVarSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdPopTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false, Vector output of VectorUDAFStdSampTimestamp(col 0) -> struct output type STRUCT requires PRIMITIVE IS false mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1092,14 +1092,14 @@ STAGE PLANS: aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: round(_col0, 0) (type: double), _col1 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col2 BETWEEN 8.97077295279421E19 AND 8.97077295279422E19 (type: boolean), _col3 BETWEEN 9.20684592523616E19 AND 9.20684592523617E19 (type: boolean), round(_col4, 3) (type: double), round(_col5, 3) (type: double), round(_col6, 3) (type: double), round(_col7, 3) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 672 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 52 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap_acid.q.out b/ql/src/test/results/clientpositive/llap_acid.q.out index 5970fd7..e04af0e 100644 --- a/ql/src/test/results/clientpositive/llap_acid.q.out +++ b/ql/src/test/results/clientpositive/llap_acid.q.out @@ -91,18 +91,18 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: int) sort order: ++ - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized LLAP IO: may be used (ACID table) @@ -110,10 +110,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -141,24 +141,26 @@ POSTHOOK: Input: default@orc_llap@csmallint=1 POSTHOOK: Input: default@orc_llap@csmallint=2 POSTHOOK: Input: default@orc_llap@csmallint=3 #### A masked pattern was here #### --285355633 1 -1241163445 --109813638 1 -58941842 -164554497 1 1161977292 -199879534 1 123351087 -246423894 1 -1645852809 -354670578 1 562841852 -455419170 1 1108177470 -665801232 1 480783141 -708885482 1 -1645852809 --285355633 2 -1241163445 --109813638 2 -58941842 -164554497 2 1161977292 -199879534 2 123351087 -246423894 2 -1645852809 -354670578 2 562841852 -455419170 2 1108177470 -665801232 2 480783141 -708885482 2 -1645852809 +-970918963 1 -588508542 +-734267047 1 895807844 +-548534304 1 -1900081338 +-546972460 1 665899329 +-469581869 1 1033373031 +-101217409 1 183045850 +-37908611 1 -1378658304 +274816197 1 -437339127 +371876492 1 1862746855 +927956889 1 -935575737 +-970918963 2 -588508542 +-734267047 2 895807844 +-548534304 2 -1900081338 +-546972460 2 665899329 +-469581869 2 1033373031 +-101217409 2 183045850 +-37908611 2 -1378658304 +274816197 2 -437339127 +371876492 2 1862746855 +927956889 2 -935575737 -923308739 3 -1887561756 -3728 3 -1887561756 762 3 -1645852809 @@ -216,18 +218,18 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: PARTIAL Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: PARTIAL Column stats: NONE Filter Operator predicate: cint is not null (type: boolean) - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: cint (type: int), csmallint (type: smallint), cbigint (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: int) sort order: ++ - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized LLAP IO: may be used (ACID table) @@ -235,10 +237,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: int), KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 296 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 20 Data size: 320 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -266,25 +268,27 @@ POSTHOOK: Input: default@orc_llap@csmallint=1 POSTHOOK: Input: default@orc_llap@csmallint=2 POSTHOOK: Input: default@orc_llap@csmallint=3 #### A masked pattern was here #### --285355633 1 -1241163445 --109813638 1 -58941842 +-970918963 1 -588508542 +-734267047 1 895807844 +-548534304 1 -1900081338 +-546972460 1 665899329 +-469581869 1 1033373031 +-101217409 1 183045850 +-37908611 1 -1378658304 1 1 2 -164554497 1 1161977292 -199879534 1 123351087 -246423894 1 -1645852809 -354670578 1 562841852 -455419170 1 1108177470 -665801232 1 480783141 -708885482 1 -1645852809 --285355633 2 -1241163445 --109813638 2 -58941842 -164554497 2 1161977292 -199879534 2 123351087 -246423894 2 -1645852809 -354670578 2 562841852 -455419170 2 1108177470 -665801232 2 480783141 -708885482 2 -1645852809 +274816197 1 -437339127 +371876492 1 1862746855 +927956889 1 -935575737 +-970918963 2 -588508542 +-734267047 2 895807844 +-548534304 2 -1900081338 +-546972460 2 665899329 +-469581869 2 1033373031 +-101217409 2 183045850 +-37908611 2 -1378658304 +274816197 2 -437339127 +371876492 2 1862746855 +927956889 2 -935575737 -923308739 3 -1887561756 -3728 3 -1887561756 762 3 -1645852809 diff --git a/ql/src/test/results/clientpositive/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/load_dyn_part1.q.out index 84d806d..01f9446 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part1.q.out @@ -58,16 +58,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -91,6 +89,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -106,6 +120,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -132,6 +180,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -162,15 +224,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -191,31 +244,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' diff --git a/ql/src/test/results/clientpositive/load_dyn_part10.q.out b/ql/src/test/results/clientpositive/load_dyn_part10.q.out index 99d3572..bbfa0bb 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part10.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part10.q.out @@ -46,6 +46,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part10 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -83,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part10 + PREHOOK: query: from srcpart insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/load_dyn_part13.q.out index 9e0ac6f..742dcf6 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part13.q.out @@ -60,6 +60,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -85,6 +86,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -105,6 +122,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -122,6 +173,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part13 + PREHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( select key, value, '22' diff --git a/ql/src/test/results/clientpositive/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/load_dyn_part14.q.out index a6a5c63..bfc0c0e 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part14.q.out @@ -50,6 +50,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -71,7 +72,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -101,6 +101,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -112,6 +128,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) TableScan Union Statistics: Num rows: 6 Data size: 938 Basic stats: COMPLETE Column stats: COMPLETE @@ -123,6 +155,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -148,6 +214,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part14 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -192,7 +265,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 @@ -222,7 +294,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reduce Operator Tree: Limit Number of rows: 2 diff --git a/ql/src/test/results/clientpositive/load_dyn_part2.q.out b/ql/src/test/results/clientpositive/load_dyn_part2.q.out index 93778a2..e80a262 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part2.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part2.q.out @@ -38,6 +38,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -68,6 +70,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -85,6 +103,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/load_dyn_part3.q.out b/ql/src/test/results/clientpositive/load_dyn_part3.q.out index 3849100..4542fe0 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part3.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part3.q.out @@ -44,6 +44,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -64,6 +65,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 + PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/load_dyn_part4.q.out b/ql/src/test/results/clientpositive/load_dyn_part4.q.out index 40b0bbb..6c36346 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part4.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part4.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part4 + PREHOOK: query: insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/load_dyn_part8.q.out b/ql/src/test/results/clientpositive/load_dyn_part8.q.out index cb1a757..681d962 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part8.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part8.q.out @@ -48,8 +48,11 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-4, Stage-5 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -96,6 +99,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -134,6 +156,34 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -334,6 +384,40 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -367,6 +451,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + Stage: Stage-1 Move Operator tables: @@ -399,6 +499,82 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10004 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2,_col3 + columns.types string,string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: from srcpart insert overwrite table nzhang_part8 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' insert overwrite table nzhang_part8 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' diff --git a/ql/src/test/results/clientpositive/load_dyn_part9.q.out b/ql/src/test/results/clientpositive/load_dyn_part9.q.out index 414e784..8955bf5 100644 --- a/ql/src/test/results/clientpositive/load_dyn_part9.q.out +++ b/ql/src/test/results/clientpositive/load_dyn_part9.q.out @@ -46,6 +46,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -66,6 +67,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -83,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part9 + PREHOOK: query: from srcpart insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/mapjoin_hook.q.out b/ql/src/test/results/clientpositive/mapjoin_hook.q.out index a9f9be3..bd59091 100644 --- a/ql/src/test/results/clientpositive/mapjoin_hook.q.out +++ b/ql/src/test/results/clientpositive/mapjoin_hook.q.out @@ -13,20 +13,24 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-6:MAPREDLOCAL +RUN: Stage-7:MAPREDLOCAL RUN: Stage-2:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS +RUN: Stage-8:COLUMNSTATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@dest1 [MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 1 HINTED_MAPJOIN_LOCAL: 1 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 0 BACKUP_COMMON_JOIN: 0 -RUN: Stage-6:MAPREDLOCAL -RUN: Stage-5:MAPRED +RUN: Stage-7:MAPREDLOCAL +RUN: Stage-6:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-8:COLUMNSTATS PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') @@ -38,14 +42,14 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 PREHOOK: Output: default@dest1 -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 1 BACKUP_COMMON_JOIN: 1 -RUN: Stage-6:CONDITIONAL -RUN: Stage-7:MAPREDLOCAL -RUN: Stage-1:MAPRED +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 1 CONVERTED_MAPJOIN_LOCAL: 1 BACKUP_COMMON_JOIN: 0 +RUN: Stage-7:CONDITIONAL +RUN: Stage-8:MAPREDLOCAL +RUN: Stage-5:MAPRED RUN: Stage-0:MOVE +RUN: Stage-3:MAPRED RUN: Stage-2:STATS +RUN: Stage-10:COLUMNSTATS PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY @@ -53,14 +57,14 @@ PREHOOK: Input: default@src PREHOOK: Output: default@dest1 FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask -ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask -[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 0 CONVERTED_MAPJOIN_LOCAL: 2 BACKUP_COMMON_JOIN: 2 -RUN: Stage-11:CONDITIONAL -RUN: Stage-14:MAPREDLOCAL +[MapJoinCounter PostHook] COMMON_JOIN: 0 HINTED_MAPJOIN: 0 HINTED_MAPJOIN_LOCAL: 0 CONVERTED_MAPJOIN: 1 CONVERTED_MAPJOIN_LOCAL: 2 BACKUP_COMMON_JOIN: 1 +RUN: Stage-12:CONDITIONAL +RUN: Stage-15:MAPREDLOCAL RUN: Stage-1:MAPRED -RUN: Stage-8:CONDITIONAL -RUN: Stage-12:MAPREDLOCAL -RUN: Stage-2:MAPRED +RUN: Stage-9:CONDITIONAL +RUN: Stage-13:MAPREDLOCAL +RUN: Stage-7:MAPRED RUN: Stage-0:MOVE +RUN: Stage-4:MAPRED RUN: Stage-3:STATS +RUN: Stage-17:COLUMNSTATS diff --git a/ql/src/test/results/clientpositive/mapreduce1.q.out b/ql/src/test/results/clientpositive/mapreduce1.q.out index 3d0a156..2660486 100644 --- a/ql/src/test/results/clientpositive/mapreduce1.q.out +++ b/ql/src/test/results/clientpositive/mapreduce1.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce2.q.out b/ql/src/test/results/clientpositive/mapreduce2.q.out index 676c387..25469c4 100644 --- a/ql/src/test/results/clientpositive/mapreduce2.q.out +++ b/ql/src/test/results/clientpositive/mapreduce2.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce3.q.out b/ql/src/test/results/clientpositive/mapreduce3.q.out index fc1a402..0d80be1 100644 --- a/ql/src/test/results/clientpositive/mapreduce3.q.out +++ b/ql/src/test/results/clientpositive/mapreduce3.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce4.q.out b/ql/src/test/results/clientpositive/mapreduce4.q.out index 17fa029..8d5182a 100644 --- a/ql/src/test/results/clientpositive/mapreduce4.q.out +++ b/ql/src/test/results/clientpositive/mapreduce4.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce5.q.out b/ql/src/test/results/clientpositive/mapreduce5.q.out index 21103f8..0bdae3d 100644 --- a/ql/src/test/results/clientpositive/mapreduce5.q.out +++ b/ql/src/test/results/clientpositive/mapreduce5.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +57,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +86,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key as c1, CAST(src.key / 10 AS INT) as c2, CAST(src.key % 10 AS INT) as c3, src.value as c4 diff --git a/ql/src/test/results/clientpositive/mapreduce6.q.out b/ql/src/test/results/clientpositive/mapreduce6.q.out index fe4e631..df1a9c6 100644 --- a/ql/src/test/results/clientpositive/mapreduce6.q.out +++ b/ql/src/test/results/clientpositive/mapreduce6.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -55,6 +57,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -69,6 +86,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, CAST(src.key / 10 AS INT) as c2, CAST(src.key % 10 AS INT) as c3, src.value diff --git a/ql/src/test/results/clientpositive/mapreduce7.q.out b/ql/src/test/results/clientpositive/mapreduce7.q.out index cc97887..4de831a 100644 --- a/ql/src/test/results/clientpositive/mapreduce7.q.out +++ b/ql/src/test/results/clientpositive/mapreduce7.q.out @@ -24,6 +24,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -61,6 +63,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 16), compute_stats(v, 16), compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -75,6 +92,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.*, src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/mapreduce8.q.out b/ql/src/test/results/clientpositive/mapreduce8.q.out index b1763c7..7a57081 100644 --- a/ql/src/test/results/clientpositive/mapreduce8.q.out +++ b/ql/src/test/results/clientpositive/mapreduce8.q.out @@ -26,6 +26,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: string) + outputColumnNames: k, v, key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k, 16), compute_stats(v, 16), compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k, v, key, ten, one, value + Column Types: string, string, int, int, int, string + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.*, src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/merge1.q.out b/ql/src/test/results/clientpositive/merge1.q.out index 2487bf7..8e38bd8 100644 --- a/ql/src/test/results/clientpositive/merge1.q.out +++ b/ql/src/test/results/clientpositive/merge1.q.out @@ -20,9 +20,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -89,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -119,6 +143,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table dest1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -504,6 +550,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -527,6 +574,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -550,6 +623,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -605,6 +685,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -628,6 +709,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -651,6 +758,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge2.q.out b/ql/src/test/results/clientpositive/merge2.q.out index a8b4bd5..90fe0f8 100644 --- a/ql/src/test/results/clientpositive/merge2.q.out +++ b/ql/src/test/results/clientpositive/merge2.q.out @@ -20,9 +20,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -66,6 +68,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -89,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -119,6 +143,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table test1 select key, count(1) from src group by key PREHOOK: type: QUERY @@ -504,6 +550,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -527,6 +574,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -550,6 +623,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -605,6 +685,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -628,6 +709,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -651,6 +758,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge3.q.out b/ql/src/test/results/clientpositive/merge3.q.out index 1eb5f0e..86b976f 100644 --- a/ql/src/test/results/clientpositive/merge3.q.out +++ b/ql/src/test/results/clientpositive/merge3.q.out @@ -2376,6 +2376,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2421,6 +2422,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2432,7 +2452,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2479,7 +2499,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2521,6 +2541,40 @@ STAGE PLANS: Truncated Path -> Alias: /merge_src_part/ds=2008-04-08 [merge_src_part] /merge_src_part/ds=2008-04-09 [merge_src_part] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -2562,6 +2616,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -4797,6 +4859,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -4814,8 +4877,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: + key expressions: _col2 (type: string) + null sort order: a + sort order: + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE tag: -1 @@ -4832,7 +4896,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4879,7 +4943,7 @@ STAGE PLANS: partition values: ds 2008-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4956,6 +5020,42 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -4997,6 +5097,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_src_part2 + Is Table Level Stats: false + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge4.q.out b/ql/src/test/results/clientpositive/merge4.q.out index 182c6a8..c73150d 100644 --- a/ql/src/test/results/clientpositive/merge4.q.out +++ b/ql/src/test/results/clientpositive/merge4.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -41,6 +42,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +102,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1138,6 +1180,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1161,6 +1204,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1187,6 +1264,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2763,9 +2847,11 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2, Stage-8 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -2786,7 +2872,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) TableScan alias: src @@ -2803,7 +2888,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 1 Data size: 259 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) Reduce Operator Tree: Select Operator @@ -2821,6 +2905,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-08-15' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1 Data size: 353 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1167 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-7 Conditional Operator @@ -2847,6 +2947,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2877,6 +2984,35 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1167 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1143 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1143 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1143 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table nzhang_part partition (ds='2010-08-15', hr) select * from ( select key, value, hr from srcpart where ds='2008-04-08' diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out index a777fe0..a3ec1e1 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition.q.out @@ -57,6 +57,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +129,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + PREHOOK: query: insert overwrite table merge_dynamic_part partition (ds='2008-04-08', hr) select key, value, hr from srcpart_merge_dp where ds='2008-04-08' PREHOOK: type: QUERY PREHOOK: Input: default@srcpart_merge_dp @@ -645,6 +687,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -668,6 +711,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -694,6 +771,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -1275,6 +1359,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1298,6 +1383,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 29 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 14 Data size: 2805 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -1324,6 +1443,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out index 5a2afb0..e68cd55 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition2.q.out @@ -76,6 +76,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -99,6 +100,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 43 Data size: 8607 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -125,6 +160,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out index 055e07a..5df6d4a 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition3.q.out @@ -136,6 +136,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -159,6 +160,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 174 Data size: 34830 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 87 Data size: 17415 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -185,6 +220,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index cbeaf42..4d70e64 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -137,6 +137,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -160,6 +161,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 9624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -186,6 +221,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 5a562f4..347bd04 100644 --- a/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ b/ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -113,6 +113,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +137,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 618 Data size: 5934 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 309 Data size: 2967 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -162,6 +197,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.merge_dynamic_part + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/mergejoin.q.out b/ql/src/test/results/clientpositive/mergejoin.q.out index 025de3c..b17ec84 100644 --- a/ql/src/test/results/clientpositive/mergejoin.q.out +++ b/ql/src/test/results/clientpositive/mergejoin.q.out @@ -1659,36 +1659,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -1701,34 +1701,34 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 56870 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash @@ -2195,36 +2195,36 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 242 Data size: 22748 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) TableScan - alias: b - filterExpr: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + alias: c + filterExpr: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + predicate: value is not null (type: boolean) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) + expressions: value (type: string) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col0 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -2237,34 +2237,34 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 266 Data size: 25022 Basic stats: COMPLETE Column stats: NONE TableScan - alias: c - filterExpr: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + alias: b + filterExpr: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: value (type: string) + expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 47000 Basic stats: COMPLETE Column stats: NONE Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - Statistics: Num rows: 605 Data size: 56870 Basic stats: COMPLETE Column stats: NONE + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 550 Data size: 51700 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() mode: hash diff --git a/ql/src/test/results/clientpositive/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/metadata_only_queries.q.out index 82b4a41..ba5418e 100644 --- a/ql/src/test/results/clientpositive/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/metadata_only_queries.q.out @@ -183,46 +183,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -233,46 +199,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -283,50 +215,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -337,50 +231,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/multi_insert_gby.q.out index cb97e58..09fb2ee 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby.q.out @@ -30,8 +30,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -72,6 +76,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -93,6 +112,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -107,6 +141,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -117,9 +187,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 SELECT key, COUNT(*) WHERE key>450 GROUP BY key @@ -211,8 +303,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -251,6 +347,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -272,6 +383,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -286,6 +412,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -296,9 +458,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE e1 SELECT key, COUNT(*) WHERE key>450 GROUP BY key diff --git a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out index 476dfa7..d44080c 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby2.q.out @@ -33,6 +33,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -92,6 +94,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: count + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(count, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete @@ -105,6 +127,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: double) + outputColumnNames: percentile + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(percentile, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -119,6 +161,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: count + Column Types: int + Table: default.e1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: percentile + Column Types: double + Table: default.e2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out index 32aec10..424bc66 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby3.q.out @@ -41,8 +41,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -103,6 +107,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -121,6 +140,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -135,6 +169,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -145,9 +215,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e2 @@ -167,8 +259,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -229,6 +325,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -247,6 +358,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -261,6 +387,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -271,9 +433,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e1 SELECT key, COUNT(distinct value) group by key @@ -1606,9 +1790,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -1667,6 +1855,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1681,11 +1884,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Map-reduce partition columns: _col0 (type: string) @@ -1709,6 +1948,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -1720,9 +1974,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM (select key, cast(key as double) as keyD, value from src order by key) a INSERT OVERWRITE TABLE e1 @@ -1746,10 +2022,16 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-13 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-6 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-3 - Stage-7 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -1797,6 +2079,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Map Reduce @@ -1829,6 +2126,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -1847,6 +2159,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -1861,6 +2188,49 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -1871,9 +2241,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -1884,6 +2276,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/multi_insert_gby4.q.out b/ql/src/test/results/clientpositive/multi_insert_gby4.q.out index dd01b74..ed17fac 100644 --- a/ql/src/test/results/clientpositive/multi_insert_gby4.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_gby4.q.out @@ -44,10 +44,16 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-12 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7, Stage-8, Stage-9 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 Stage-2 depends on stages: Stage-3 - Stage-6 depends on stages: Stage-2 + Stage-8 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -92,6 +98,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -113,6 +134,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 > 490) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +170,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -148,6 +199,49 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e3 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -158,9 +252,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -171,9 +287,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Stage: Stage-6 + Stage: Stage-8 Stats-Aggr Operator + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (SELECT key, value FROM src) a INSERT OVERWRITE TABLE e1 SELECT key, COUNT(*) WHERE key>450 GROUP BY key diff --git a/ql/src/test/results/clientpositive/multi_insert_mixed.q.out b/ql/src/test/results/clientpositive/multi_insert_mixed.q.out index 9acae2e..67e4346 100644 --- a/ql/src/test/results/clientpositive/multi_insert_mixed.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_mixed.q.out @@ -39,12 +39,16 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-3 Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-5, Stage-8, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-5, Stage-8, Stage-9, Stage-10 + Stage-13 depends on stages: Stage-5, Stage-8, Stage-9, Stage-10 Stage-6 depends on stages: Stage-3 Stage-7 depends on stages: Stage-6 Stage-1 depends on stages: Stage-7 Stage-8 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -100,6 +104,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -136,6 +155,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -150,6 +189,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi3 + Stage: Stage-6 Map Reduce Map Operator Tree: @@ -196,6 +256,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -223,6 +303,28 @@ STAGE PLANS: Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select key, count(1) group by key order by key insert overwrite table src_multi2 select value, count(1) group by value order by value diff --git a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out index af0ef54..20c59c2 100644 --- a/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_move_tasks_share_dependencies.q.out @@ -29,8 +29,11 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-5, Stage-6 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -54,6 +57,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -69,6 +85,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -86,6 +130,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -99,6 +157,28 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -166,19 +246,17 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3, Stage-10, Stage-11 + Stage-13 depends on stages: Stage-3, Stage-10, Stage-11 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -202,6 +280,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -217,6 +308,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -243,6 +362,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -286,44 +419,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 @@ -390,11 +506,19 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3, Stage-5, Stage-11 + Stage-13 depends on stages: Stage-3, Stage-5, Stage-11 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -418,6 +542,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -433,6 +570,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -450,6 +615,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -463,6 +642,67 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -533,6 +773,8 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-17 depends on stages: Stage-3, Stage-10, Stage-16 + Stage-18 depends on stages: Stage-3, Stage-10, Stage-16 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 @@ -543,6 +785,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -566,6 +809,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -581,6 +837,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -607,6 +891,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -689,6 +987,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -757,8 +1077,12 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -794,6 +1118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -810,6 +1149,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Dependency Collection @@ -827,6 +1181,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -837,9 +1205,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -900,19 +1312,23 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-18 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 + Stage-19 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -948,6 +1364,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -964,6 +1395,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -990,6 +1436,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1000,7 +1460,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 + Stage: Stage-11 Stats-Aggr Operator Stage: Stage-5 @@ -1033,16 +1493,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1054,7 +1536,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1066,12 +1548,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1133,8 +1637,12 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-6, Stage-5, Stage-7 Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1170,6 +1678,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1186,6 +1709,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-4 Dependency Collection @@ -1203,6 +1741,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1213,9 +1765,53 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1276,19 +1872,23 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-13, Stage-12, Stage-15 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-18 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 + Stage-19 depends on stages: Stage-3, Stage-11, Stage-10, Stage-17 Stage-1 depends on stages: Stage-4 - Stage-10 depends on stages: Stage-1 + Stage-11 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 + Stage-10 depends on stages: Stage-2 + Stage-16 depends on stages: Stage-2 , consists of Stage-13, Stage-12, Stage-14 Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-12 + Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-17 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1324,6 +1924,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1340,6 +1955,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-9 Conditional Operator @@ -1366,6 +1996,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1376,7 +2020,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-10 + Stage: Stage-11 Stats-Aggr Operator Stage: Stage-5 @@ -1409,16 +2053,38 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-16 Conditional Operator - Stage: Stage-12 + Stage: Stage-13 Move Operator files: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-11 + Stage: Stage-12 Map Reduce Map Operator Tree: TableScan @@ -1430,7 +2096,7 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -1442,12 +2108,34 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Stage: Stage-14 + Stage: Stage-15 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-17 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from src insert overwrite table src_multi1 select * where key < 10 group by key, value insert overwrite table src_multi2 select * where key > 10 and key < 20 group by key, value @@ -1509,8 +2197,11 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-5, Stage-6 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1536,6 +2227,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1547,6 +2251,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1567,6 +2286,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1578,6 +2310,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -1595,6 +2355,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1608,6 +2382,28 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -1694,19 +2490,17 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-9 depends on stages: Stage-2 , consists of Stage-6, Stage-5, Stage-7 Stage-6 - Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 + Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-2 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-13 depends on stages: Stage-3, Stage-10, Stage-11 + Stage-14 depends on stages: Stage-3, Stage-10, Stage-11 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-2 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1732,6 +2526,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1743,6 +2550,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1763,6 +2585,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1774,6 +2609,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -1800,6 +2663,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1843,44 +2720,27 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-11 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 @@ -1966,11 +2826,19 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-4 depends on stages: Stage-2 + Stage-4 depends on stages: Stage-2, Stage-7, Stage-6, Stage-9 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-13 depends on stages: Stage-3, Stage-5, Stage-11 + Stage-14 depends on stages: Stage-3, Stage-5, Stage-11 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 , consists of Stage-7, Stage-6, Stage-8 + Stage-7 + Stage-6 + Stage-8 + Stage-9 depends on stages: Stage-8 + Stage-11 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -1996,6 +2864,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2007,6 +2888,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2027,6 +2923,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2038,6 +2947,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-4 Dependency Collection @@ -2055,6 +2992,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -2068,39 +3019,100 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator -PREHOOK: query: from (select * from src union all select * from src) s -insert overwrite table src_multi1 select * where key < 10 -insert overwrite table src_multi2 select * where key > 10 and key < 20 -PREHOOK: type: QUERY -PREHOOK: Input: default@src -PREHOOK: Output: default@src_multi1 -PREHOOK: Output: default@src_multi2 -POSTHOOK: query: from (select * from src union all select * from src) s -insert overwrite table src_multi1 select * where key < 10 -insert overwrite table src_multi2 select * where key > 10 and key < 20 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src -POSTHOOK: Output: default@src_multi1 -POSTHOOK: Output: default@src_multi2 -POSTHOOK: Lineage: src_multi1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_multi1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] -POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: select * from src_multi1 -PREHOOK: type: QUERY -PREHOOK: Input: default@src_multi1 -#### A masked pattern was here #### -POSTHOOK: query: select * from src_multi1 -POSTHOOK: type: QUERY -POSTHOOK: Input: default@src_multi1 + Stage: Stage-10 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -2 val_2 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + +PREHOOK: query: from (select * from src union all select * from src) s +insert overwrite table src_multi1 select * where key < 10 +insert overwrite table src_multi2 select * where key > 10 and key < 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@src_multi1 +PREHOOK: Output: default@src_multi2 +POSTHOOK: query: from (select * from src union all select * from src) s +insert overwrite table src_multi1 select * where key < 10 +insert overwrite table src_multi2 select * where key > 10 and key < 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@src_multi1 +POSTHOOK: Output: default@src_multi2 +POSTHOOK: Lineage: src_multi1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: src_multi2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from src_multi1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +POSTHOOK: query: select * from src_multi1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_multi1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 2 val_2 4 val_4 4 val_4 @@ -2157,6 +3169,8 @@ STAGE DEPENDENCIES: Stage-4 depends on stages: Stage-6, Stage-5, Stage-8, Stage-12, Stage-11, Stage-14 Stage-0 depends on stages: Stage-4 Stage-3 depends on stages: Stage-0 + Stage-18 depends on stages: Stage-3, Stage-10, Stage-16 + Stage-19 depends on stages: Stage-3, Stage-10, Stage-16 Stage-1 depends on stages: Stage-4 Stage-10 depends on stages: Stage-1 Stage-5 @@ -2167,6 +3181,7 @@ STAGE DEPENDENCIES: Stage-11 Stage-13 Stage-14 depends on stages: Stage-13 + Stage-16 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -2192,6 +3207,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2203,6 +3231,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -2223,6 +3266,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -2234,6 +3290,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -2260,6 +3344,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-18 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -2342,6 +3440,28 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-16 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from src union all select * from src) s insert overwrite table src_multi1 select * where key < 10 insert overwrite table src_multi2 select * where key > 10 and key < 20 @@ -3273,14 +4393,18 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-10 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-5, Stage-7, Stage-8, Stage-2, Stage-3 + Stage-12 depends on stages: Stage-5, Stage-7, Stage-8, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-9 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-9 + Stage-10 depends on stages: Stage-9 + Stage-3 depends on stages: Stage-10 STAGE PLANS: Stage: Stage-4 @@ -3304,6 +4428,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3319,46 +4456,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3376,6 +4510,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3389,13 +4537,77 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-8 + Stage: Stage-10 Map Reduce Map Operator Tree: TableScan @@ -3497,14 +4709,23 @@ insert overwrite table src_multi2 select * where key > 10 and key < 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-4 is a root stage - Stage-6 depends on stages: Stage-4, Stage-8 + Stage-6 depends on stages: Stage-4, Stage-9, Stage-8, Stage-11, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-16 depends on stages: Stage-5, Stage-7, Stage-13, Stage-2, Stage-3 + Stage-17 depends on stages: Stage-5, Stage-7, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-7 depends on stages: Stage-1 - Stage-2 depends on stages: Stage-4 - Stage-8 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-8 + Stage-12 depends on stages: Stage-4 , consists of Stage-9, Stage-8, Stage-10 + Stage-9 + Stage-8 + Stage-10 + Stage-11 depends on stages: Stage-10 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3528,6 +4749,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3543,46 +4777,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-6 Dependency Collection @@ -3600,6 +4831,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3613,22 +4858,125 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-2 + Stage: Stage-12 + Conditional Operator + + Stage: Stage-9 Move Operator files: - hdfs directory: false + hdfs directory: true #### A masked pattern was here #### Stage: Stage-8 Map Reduce Map Operator Tree: TableScan - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string) + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_multi2 + + Stage: Stage-11 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-13 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-14 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Move Operator + files: + hdfs directory: false +#### A masked pattern was here #### + + Stage: Stage-15 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3723,22 +5071,21 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-4, Stage-14, Stage-15 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-16 depends on stages: Stage-5, Stage-12, Stage-13, Stage-2, Stage-3 + Stage-17 depends on stages: Stage-5, Stage-12, Stage-13, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-12 depends on stages: Stage-1 Stage-7 Stage-9 Stage-10 depends on stages: Stage-9 - Stage-17 depends on stages: Stage-4 , consists of Stage-14, Stage-13, Stage-15 - Stage-14 - Stage-13 - Stage-15 - Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 - Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-13 depends on stages: Stage-4 + Stage-14 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-14 + Stage-15 depends on stages: Stage-14 + Stage-3 depends on stages: Stage-15 STAGE PLANS: Stage: Stage-4 @@ -3762,6 +5109,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3777,46 +5137,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -3843,6 +5200,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-17 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3886,44 +5257,69 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-17 - Conditional Operator - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-15 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - - Stage: Stage-16 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -3931,7 +5327,7 @@ STAGE PLANS: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-15 Map Reduce Map Operator Tree: TableScan @@ -4035,9 +5431,11 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-11 depends on stages: Stage-4 , consists of Stage-8, Stage-7, Stage-9 Stage-8 - Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-4, Stage-18 + Stage-6 depends on stages: Stage-8, Stage-7, Stage-10, Stage-14, Stage-13, Stage-16, Stage-19, Stage-20 Stage-0 depends on stages: Stage-6 Stage-5 depends on stages: Stage-0 + Stage-21 depends on stages: Stage-5, Stage-12, Stage-18, Stage-2, Stage-3 + Stage-22 depends on stages: Stage-5, Stage-12, Stage-18, Stage-2, Stage-3 Stage-1 depends on stages: Stage-6 Stage-12 depends on stages: Stage-1 Stage-7 @@ -4048,9 +5446,11 @@ STAGE DEPENDENCIES: Stage-13 Stage-15 Stage-16 depends on stages: Stage-15 - Stage-2 depends on stages: Stage-4 Stage-18 depends on stages: Stage-4 - Stage-3 depends on stages: Stage-18 + Stage-19 depends on stages: Stage-4 + Stage-2 depends on stages: Stage-19 + Stage-20 depends on stages: Stage-19 + Stage-3 depends on stages: Stage-20 STAGE PLANS: Stage: Stage-4 @@ -4074,6 +5474,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -4089,46 +5502,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: string), value (type: string) - sort order: ++ - Map-reduce partition columns: key (type: string) - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reduce Operator Tree: - Forward - Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (KEY._col0 < 10) (type: boolean) - Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Filter Operator - predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) - Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: complete - outputColumnNames: _col0, _col1 - Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-11 Conditional Operator @@ -4155,6 +5565,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-21 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-22 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -4237,13 +5661,77 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-18 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-19 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: key (type: string), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 10) (type: boolean) + Statistics: Num rows: 73 Data size: 775 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Stage: Stage-2 Move Operator files: hdfs directory: false #### A masked pattern was here #### - Stage: Stage-18 + Stage: Stage-20 Map Reduce Map Operator Tree: TableScan diff --git a/ql/src/test/results/clientpositive/multi_insert_union_src.q.out b/ql/src/test/results/clientpositive/multi_insert_union_src.q.out index 1ff1db5..28dd199 100644 --- a/ql/src/test/results/clientpositive/multi_insert_union_src.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_union_src.q.out @@ -52,6 +52,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-5 + Stage-8 depends on stages: Stage-3, Stage-5 Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 @@ -131,6 +133,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -145,6 +167,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -167,6 +203,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 58 Data size: 608 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator diff --git a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out index 5f69cc5..316c3b3 100644 --- a/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out +++ b/ql/src/test/results/clientpositive/multi_insert_with_join2.q.out @@ -67,6 +67,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -118,6 +120,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -132,6 +149,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Warning: Shuffle Join JOIN[8][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id @@ -149,6 +195,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -200,6 +248,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 25 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -214,6 +277,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -236,8 +328,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-0 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -285,6 +381,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col5 = 'Id_1') and (_col6 = 'val_103')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -300,6 +411,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -314,6 +440,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -324,9 +486,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a LEFT JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -349,8 +533,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -398,6 +586,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -413,6 +616,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -427,6 +645,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -437,9 +691,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -462,8 +738,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -517,6 +797,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -532,6 +827,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -546,6 +856,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -556,9 +902,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -581,8 +949,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -636,6 +1008,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_1' (type: string), 'val_103' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col6 = 'val_104') and (_col5 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -651,6 +1038,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), 'Id_2' (type: string), 'val_104' (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -665,6 +1067,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -675,9 +1113,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -700,8 +1160,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -767,6 +1231,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -782,6 +1261,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -796,6 +1290,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -806,9 +1336,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: explain FROM T_A a JOIN T_B b ON a.id = b.id INSERT OVERWRITE TABLE join_result_1 @@ -831,8 +1383,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -898,6 +1454,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col2 = 'val_104') and (_col3 = 'Id_2')) (type: boolean) Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE @@ -913,6 +1484,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: ida, vala, idb, valb + Statistics: Num rows: 1 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ida, 16), compute_stats(vala, 16), compute_stats(idb, 16), compute_stats(valb, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -927,6 +1513,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: ida, vala, idb, valb + Column Types: string, string, string, string + Table: default.join_result_3 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -937,6 +1559,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.join_result_3 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out b/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out index 7af8c43..16e3814 100644 --- a/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out +++ b/ql/src/test/results/clientpositive/multigroupby_singlemr.q.out @@ -52,9 +52,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -114,6 +118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -128,11 +147,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -157,6 +212,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -168,9 +238,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST1 SELECT TBL.C1, COUNT(TBL.C2) GROUP BY TBL.C1 @@ -185,9 +277,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -247,6 +343,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -261,11 +372,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -290,6 +437,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -301,9 +463,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 @@ -318,9 +502,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -380,6 +568,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -394,11 +597,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -423,6 +662,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -434,9 +688,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 @@ -451,8 +727,12 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-3, Stage-4, Stage-5, Stage-6 + Stage-4 depends on stages: Stage-2 Stage-1 depends on stages: Stage-2 - Stage-4 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -492,6 +772,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -510,6 +805,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -524,6 +834,42 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest4 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -534,9 +880,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 - Stage: Stage-4 + Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: EXPLAIN FROM TBL INSERT OVERWRITE TABLE DEST3 SELECT TBL.C1, TBL.C2, TBL.C3, COUNT(TBL.C4) GROUP BY TBL.C1, TBL.C2, TBL.C3 @@ -553,12 +921,18 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8, Stage-10, Stage-11 + Stage-13 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8, Stage-10, Stage-11 + Stage-14 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8, Stage-10, Stage-11 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 - Stage-7 depends on stages: Stage-3 - Stage-2 depends on stages: Stage-7 - Stage-8 depends on stages: Stage-2 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-6 + Stage-9 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-9 + Stage-10 depends on stages: Stage-2 + Stage-11 depends on stages: Stage-9 STAGE PLANS: Stage: Stage-3 @@ -634,6 +1008,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -648,11 +1037,54 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int) sort order: ++ Map-reduce partition columns: _col0 (type: int), _col1 (type: int) @@ -677,6 +1109,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -688,10 +1135,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator - Stage: Stage-7 + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-9 Map Reduce Map Operator Tree: TableScan @@ -720,6 +1189,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-2 Move Operator @@ -731,6 +1215,28 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Stage: Stage-8 + Stage: Stage-10 Stats-Aggr Operator + Stage: Stage-11 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out b/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out index d01461b..4265eb9 100644 --- a/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out +++ b/ql/src/test/results/clientpositive/nonreserved_keywords_insert_into1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -36,7 +38,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -58,6 +59,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(as, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -72,6 +88,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -103,6 +148,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -121,7 +168,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -143,6 +189,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(as, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,6 +218,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT INTO TABLE `insert` SELECT * FROM src LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -197,6 +287,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -215,7 +307,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reduce Operator Tree: Select Operator @@ -237,6 +328,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, as + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(as, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -251,6 +357,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, as + Column Types: int, string + Table: default.insert + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE `insert` SELECT * FROM src LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/notable_alias1.q.out b/ql/src/test/results/clientpositive/notable_alias1.q.out index 200b31c..3e44959 100644 --- a/ql/src/test/results/clientpositive/notable_alias1.q.out +++ b/ql/src/test/results/clientpositive/notable_alias1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 16), compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', key, count(1) WHERE src.key < 100 group by key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/notable_alias2.q.out b/ql/src/test/results/clientpositive/notable_alias2.q.out index 4df8073..49a2b95 100644 --- a/ql/src/test/results/clientpositive/notable_alias2.q.out +++ b/ql/src/test/results/clientpositive/notable_alias2.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -64,6 +66,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: dummy, key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(dummy, 16), compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -78,6 +95,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: dummy, key, value + Column Types: string, int, double + Table: default.dest1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT '1234', src.key, count(1) WHERE key < 100 group by src.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/orc_merge1.q.out b/ql/src/test/results/clientpositive/orc_merge1.q.out index a83e85b..939bf2a 100644 --- a/ql/src/test/results/clientpositive/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/orc_merge1.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -126,6 +168,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +192,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -175,6 +252,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -240,6 +324,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -263,6 +348,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -289,6 +408,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge10.q.out b/ql/src/test/results/clientpositive/orc_merge10.q.out index 607aaeb..468b03d 100644 --- a/ql/src/test/results/clientpositive/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/orc_merge10.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -126,6 +168,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +192,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -175,6 +252,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -240,6 +324,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -263,6 +348,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -289,6 +408,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge2.q.out b/ql/src/test/results/clientpositive/orc_merge2.q.out index d4c474f..29b3373 100644 --- a/ql/src/test/results/clientpositive/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge2.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -53,6 +54,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -80,6 +115,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge3.q.out b/ql/src/test/results/clientpositive/orc_merge3.q.out index 7bf12c6..416cb3c 100644 --- a/ql/src/test/results/clientpositive/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/orc_merge3.q.out @@ -60,6 +60,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -83,6 +84,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -106,6 +133,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge4.q.out b/ql/src/test/results/clientpositive/orc_merge4.q.out index 828f204..7c85443 100644 --- a/ql/src/test/results/clientpositive/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/orc_merge4.q.out @@ -78,6 +78,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -101,6 +102,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -124,6 +151,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge5.q.out b/ql/src/test/results/clientpositive/orc_merge5.q.out index e845186..0d965df 100644 --- a/ql/src/test/results/clientpositive/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/orc_merge5.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -68,6 +95,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -112,6 +146,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -139,6 +174,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -162,6 +223,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge6.q.out b/ql/src/test/results/clientpositive/orc_merge6.q.out index 5ece361..5de3c47 100644 --- a/ql/src/test/results/clientpositive/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/orc_merge6.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -54,6 +55,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -71,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -157,6 +199,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -184,6 +227,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -210,6 +287,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out index a83e85b..939bf2a 100644 --- a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out @@ -54,6 +54,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -126,6 +168,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -149,6 +192,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -175,6 +252,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -240,6 +324,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -263,6 +348,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -289,6 +408,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out index fcf1c68..762acd0 100644 --- a/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -53,6 +54,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out index a27041f..6388dfc 100644 --- a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -50,6 +51,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -66,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/parallel.q.out b/ql/src/test/results/clientpositive/parallel.q.out index 459105e..c851131 100644 --- a/ql/src/test/results/clientpositive/parallel.q.out +++ b/ql/src/test/results/clientpositive/parallel.q.out @@ -29,8 +29,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -91,6 +95,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -104,6 +123,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -118,6 +152,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -128,9 +198,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select key, value from src group by key, value) s insert overwrite table src_a select s.key, s.value group by s.key, s.value insert overwrite table src_b select s.key, s.value group by s.key, s.value diff --git a/ql/src/test/results/clientpositive/parallel_join1.q.out b/ql/src/test/results/clientpositive/parallel_join1.q.out index 8843661..43480e1 100644 --- a/ql/src/test/results/clientpositive/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/parallel_join1.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +106,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/parquet_analyze.q.out b/ql/src/test/results/clientpositive/parquet_analyze.q.out index d3cdc3f..8627b18 100644 --- a/ql/src/test/results/clientpositive/parquet_analyze.q.out +++ b/ql/src/test/results/clientpositive/parquet_analyze.q.out @@ -90,7 +90,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 700 @@ -138,7 +138,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"address\":\"true\",\"first_name\":\"true\",\"id\":\"true\",\"last_name\":\"true\",\"salary\":\"true\",\"start_date\":\"true\",\"state\":\"true\"}} numFiles 1 numRows 100 rawDataSize 5952 diff --git a/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out b/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out index b9a3664..6fd3357 100644 --- a/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out +++ b/ql/src/test/results/clientpositive/parquet_int96_timestamp.q.out @@ -56,7 +56,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone UTC @@ -125,7 +125,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone PST @@ -194,7 +194,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 rawDataSize 1 @@ -262,7 +262,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone CST @@ -331,7 +331,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} numFiles 1 numRows 1 parquet.mr.int96.write.zone PST diff --git a/ql/src/test/results/clientpositive/pcr.q.out b/ql/src/test/results/clientpositive/pcr.q.out index a1301fd..14fb140 100644 --- a/ql/src/test/results/clientpositive/pcr.q.out +++ b/ql/src/test/results/clientpositive/pcr.q.out @@ -91,7 +91,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -138,7 +138,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -280,7 +280,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -327,7 +327,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -374,7 +374,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -554,7 +554,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -601,7 +601,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -745,7 +745,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -792,7 +792,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -938,7 +938,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -985,7 +985,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1032,7 +1032,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1189,7 +1189,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1236,7 +1236,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1283,7 +1283,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1447,7 +1447,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1494,7 +1494,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1621,7 +1621,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1668,7 +1668,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1835,7 +1835,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1882,7 +1882,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1929,7 +1929,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2123,7 +2123,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2170,7 +2170,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2326,7 +2326,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2585,7 +2585,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2632,7 +2632,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2880,7 +2880,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2927,7 +2927,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2974,7 +2974,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3021,7 +3021,7 @@ STAGE PLANS: partition values: ds 2000-04-11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3200,7 +3200,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3247,7 +3247,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3294,7 +3294,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3459,16 +3459,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -3514,6 +3512,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3550,6 +3564,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3561,7 +3602,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3602,6 +3643,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -3644,6 +3714,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -3824,15 +3910,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -3870,235 +3947,122 @@ STAGE PLANS: Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: #### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false - Stage: Stage-12 +PREHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Output: default@pcr_t2 +PREHOOK: Output: default@pcr_t3 +POSTHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcr_t2 +POSTHOOK: Output: default@pcr_t3 +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain extended +from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 + Stage-5 + Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 + Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-4 + Stage-6 + Stage-7 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-1 + Stage-10 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-2 Map Reduce Map Operator Tree: TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 0 - numRows 0 - rawDataSize 0 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: -#### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - -PREHOOK: query: from pcr_t1 -insert overwrite table pcr_t2 select key, value where ds='2000-04-08' -insert overwrite table pcr_t3 select key, value where ds='2000-04-08' -PREHOOK: type: QUERY -PREHOOK: Input: default@pcr_t1 -PREHOOK: Input: default@pcr_t1@ds=2000-04-08 -PREHOOK: Output: default@pcr_t2 -PREHOOK: Output: default@pcr_t3 -POSTHOOK: query: from pcr_t1 -insert overwrite table pcr_t2 select key, value where ds='2000-04-08' -insert overwrite table pcr_t3 select key, value where ds='2000-04-08' -POSTHOOK: type: QUERY -POSTHOOK: Input: default@pcr_t1 -POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 -POSTHOOK: Output: default@pcr_t2 -POSTHOOK: Output: default@pcr_t3 -POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] -POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] -PREHOOK: query: explain extended -from pcr_t1 -insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 -insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 -PREHOOK: type: QUERY -POSTHOOK: query: explain extended -from pcr_t1 -insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 -insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 -POSTHOOK: type: QUERY -STAGE DEPENDENCIES: - Stage-2 is a root stage - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 - Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 - Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 - -STAGE PLANS: - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - alias: pcr_t1 - Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + alias: pcr_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE GatherStats: false Filter Operator isSamplingPred: false @@ -4119,7 +4083,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4140,6 +4104,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 2 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (key = 3) (type: boolean) @@ -4159,7 +4139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4180,6 +4160,33 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 3 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -4191,7 +4198,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4232,6 +4239,35 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-8 Conditional Operator @@ -4251,7 +4287,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4274,6 +4310,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -4288,7 +4340,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4318,7 +4370,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4339,7 +4391,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4375,7 +4427,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4405,7 +4457,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4426,7 +4478,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4454,15 +4506,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -4472,7 +4515,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -4500,180 +4543,69 @@ STAGE PLANS: Map Operator Tree: TableScan GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: #### A masked pattern was here #### Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + base file name: -mr-10005 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 + columns _col0,_col1 + columns.types struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Truncated Path -> Alias: #### A masked pattern was here #### - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - GatherStats: false - File Output Operator - compressed: false - GlobalTableId: 0 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: -ext-10005 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,value - columns.comments - columns.types int:string -#### A masked pattern was here #### - name default.pcr_t3 - numFiles 1 - numRows 20 - rawDataSize 160 - serialization.ddl struct pcr_t3 { i32 key, string value} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 180 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.pcr_t3 - name: default.pcr_t3 - Truncated Path -> Alias: + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false PREHOOK: query: from pcr_t1 insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 diff --git a/ql/src/test/results/clientpositive/pointlookup2.q.out b/ql/src/test/results/clientpositive/pointlookup2.q.out index 3438c74..114e77f 100644 --- a/ql/src/test/results/clientpositive/pointlookup2.q.out +++ b/ql/src/test/results/clientpositive/pointlookup2.q.out @@ -136,7 +136,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -183,7 +183,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -334,7 +334,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -553,7 +553,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -600,7 +600,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -813,7 +813,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -860,7 +860,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -905,7 +905,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -926,7 +926,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1128,7 +1128,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1175,7 +1175,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1222,7 +1222,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1267,7 +1267,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value @@ -1288,7 +1288,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"ds":"true","key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns ds,key,value diff --git a/ql/src/test/results/clientpositive/pointlookup3.q.out b/ql/src/test/results/clientpositive/pointlookup3.q.out index 2c3e39f..3ad7d38 100644 --- a/ql/src/test/results/clientpositive/pointlookup3.q.out +++ b/ql/src/test/results/clientpositive/pointlookup3.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -418,7 +418,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -638,7 +638,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -686,7 +686,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -900,7 +900,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -948,7 +948,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -996,7 +996,7 @@ STAGE PLANS: ds1 2000-04-10 ds2 2001-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/pointlookup4.q.out b/ql/src/test/results/clientpositive/pointlookup4.q.out index 8ef5551..3c9cc60 100644 --- a/ql/src/test/results/clientpositive/pointlookup4.q.out +++ b/ql/src/test/results/clientpositive/pointlookup4.q.out @@ -91,7 +91,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -139,7 +139,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -267,7 +267,7 @@ STAGE PLANS: ds1 2000-04-08 ds2 2001-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -315,7 +315,7 @@ STAGE PLANS: ds1 2000-04-09 ds2 2001-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/ppd_constant_expr.q.out b/ql/src/test/results/clientpositive/ppd_constant_expr.q.out index cbe7654..3d96f3c 100644 --- a/ql/src/test/results/clientpositive/ppd_constant_expr.q.out +++ b/ql/src/test/results/clientpositive/ppd_constant_expr.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -43,6 +44,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -66,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -156,6 +190,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -179,6 +214,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.ppd_constant_expr + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: double) + outputColumnNames: c1, c2, c3 + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -202,6 +263,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3 + Column Types: string, int, double + Table: default.ppd_constant_expr + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/ppd_join5.q.out b/ql/src/test/results/clientpositive/ppd_join5.q.out index 551769c..1cd3a7a 100644 --- a/ql/src/test/results/clientpositive/ppd_join5.q.out +++ b/ql/src/test/results/clientpositive/ppd_join5.q.out @@ -32,7 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE [] POSTHOOK: Lineage: t1.id2 SIMPLE [] POSTHOOK: Lineage: t2.d SIMPLE [] POSTHOOK: Lineage: t2.id SIMPLE [] -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) @@ -63,35 +63,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -104,34 +101,38 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -148,7 +149,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: explain select * from ( select a.*,b.d d1,c.d d2 from @@ -183,35 +184,32 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -224,37 +222,41 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col3 > 1) or (_col4 > 1)) (type: boolean) + predicate: ((_col4 > 1) or (_col2 > 1)) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -271,7 +273,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: select * from ( select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) diff --git a/ql/src/test/results/clientpositive/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/ppd_multi_insert.q.out index 7e501c7..baf1f45 100644 --- a/ql/src/test/results/clientpositive/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/ppd_multi_insert.q.out @@ -48,10 +48,16 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-13 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-14 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -105,6 +111,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -120,6 +141,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +171,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -163,6 +215,49 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -173,9 +268,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -189,9 +306,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 Move Operator files: @@ -1313,10 +1459,16 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-13 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-14 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8, Stage-9, Stage-10, Stage-3 + Stage-6 depends on stages: Stage-4 Stage-1 depends on stages: Stage-4 - Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-4 Stage-2 depends on stages: Stage-4 - Stage-7 depends on stages: Stage-2 + Stage-9 depends on stages: Stage-2 + Stage-10 depends on stages: Stage-4 Stage-3 depends on stages: Stage-4 STAGE PLANS: @@ -1370,6 +1522,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1385,6 +1552,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1400,6 +1582,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (_col0 >= 300) (type: boolean) Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE @@ -1428,6 +1626,49 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -1438,9 +1679,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Move Operator tables: @@ -1454,9 +1717,38 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi3 - Stage: Stage-7 + Stage: Stage-9 Stats-Aggr Operator + Stage: Stage-10 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-3 Move Operator files: diff --git a/ql/src/test/results/clientpositive/push_or.q.out b/ql/src/test/results/clientpositive/push_or.q.out index dacdc40..913fc67 100644 --- a/ql/src/test/results/clientpositive/push_or.q.out +++ b/ql/src/test/results/clientpositive/push_or.q.out @@ -73,7 +73,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -120,7 +120,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/quote1.q.out b/ql/src/test/results/clientpositive/quote1.q.out index f8592c4..f4fdc3f 100644 --- a/ql/src/test/results/clientpositive/quote1.q.out +++ b/ql/src/test/results/clientpositive/quote1.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -46,6 +47,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string) + outputColumnNames: location, type, table + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(location, 16), compute_stats(type, 16) + keys: table (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -71,6 +106,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: location, type + Column Types: int, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out index fa4aae8..2a859d0 100644 --- a/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out +++ b/ql/src/test/results/clientpositive/rand_partitionpruner2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -72,6 +73,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -174,6 +191,35 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [a] /srcpart/ds=2008-04-08/hr=12 [a] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -216,6 +262,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: string, string, string, string + Table: default.tmptable + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/rcfile_default_format.q.out b/ql/src/test/results/clientpositive/rcfile_default_format.q.out index bb846c0..afa0797 100644 --- a/ql/src/test/results/clientpositive/rcfile_default_format.q.out +++ b/ql/src/test/results/clientpositive/rcfile_default_format.q.out @@ -121,7 +121,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} numFiles 1 numRows 500 rawDataSize 1406 diff --git a/ql/src/test/results/clientpositive/rcfile_null_value.q.out b/ql/src/test/results/clientpositive/rcfile_null_value.q.out index a66655b..228c802 100644 --- a/ql/src/test/results/clientpositive/rcfile_null_value.q.out +++ b/ql/src/test/results/clientpositive/rcfile_null_value.q.out @@ -91,6 +91,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -149,6 +151,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.dest1_rc + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -163,6 +180,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1_rc + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/sample1.q.out b/ql/src/test/results/clientpositive/sample1.q.out index 1947e92..d2d85b8 100644 --- a/ql/src/test/results/clientpositive/sample1.q.out +++ b/ql/src/test/results/clientpositive/sample1.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -74,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(dt, 16), compute_stats(hr, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +144,35 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample2.q.out b/ql/src/test/results/clientpositive/sample2.q.out index 9c5e238..ada73a5 100644 --- a/ql/src/test/results/clientpositive/sample2.q.out +++ b/ql/src/test/results/clientpositive/sample2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -73,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample4.q.out b/ql/src/test/results/clientpositive/sample4.q.out index 20bd86c..61bb4d6 100644 --- a/ql/src/test/results/clientpositive/sample4.q.out +++ b/ql/src/test/results/clientpositive/sample4.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -73,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample5.q.out b/ql/src/test/results/clientpositive/sample5.q.out index 0ed1fb2..178b1bb 100644 --- a/ql/src/test/results/clientpositive/sample5.q.out +++ b/ql/src/test/results/clientpositive/sample5.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -74,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +144,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample6.q.out b/ql/src/test/results/clientpositive/sample6.q.out index 1988a90..e7593de 100644 --- a/ql/src/test/results/clientpositive/sample6.q.out +++ b/ql/src/test/results/clientpositive/sample6.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -73,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -126,6 +143,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -168,6 +214,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sample7.q.out b/ql/src/test/results/clientpositive/sample7.q.out index 7de68a8..bed69e0 100644 --- a/ql/src/test/results/clientpositive/sample7.q.out +++ b/ql/src/test/results/clientpositive/sample7.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -74,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -127,6 +144,35 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -169,6 +215,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/skewjoin.q.out b/ql/src/test/results/clientpositive/skewjoin.q.out index 477abb9..5eed010 100644 --- a/ql/src/test/results/clientpositive/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/skewjoin.q.out @@ -80,11 +80,13 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-0 - Stage-6 - Stage-4 depends on stages: Stage-6 - Stage-0 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 , consists of Stage-7, Stage-0, Stage-3 + Stage-7 + Stage-5 depends on stages: Stage-7 + Stage-0 depends on stages: Stage-5 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -143,11 +145,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-5 + Stage: Stage-6 Conditional Operator - Stage: Stage-6 + Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: 1 @@ -161,7 +178,7 @@ STAGE PLANS: 0 reducesinkkey0 (type: string) 1 reducesinkkey0 (type: string) - Stage: Stage-4 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan @@ -184,6 +201,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -200,6 +232,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out index b53e670..bda3511 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_11.q.out @@ -50,6 +50,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -93,7 +95,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -172,6 +174,32 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -205,6 +233,86 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + GatherStats: false + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: -mr-10002 + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + column.name.delimiter , + columns _col0,_col1,_col2 + columns.types string,struct,struct + escape.delim \ + serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Truncated Path -> Alias: +#### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -1827,7 +1935,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1936,7 +2044,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out index 9928a60..9072803 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_12.q.out @@ -62,6 +62,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -129,7 +130,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -207,6 +208,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -254,6 +263,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -399,6 +409,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out index 49ff635..3bc00c8 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_13.q.out @@ -112,7 +112,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -135,7 +135,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -292,7 +292,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -315,7 +315,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out index 6c41171..76fde24 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_20.q.out @@ -42,6 +42,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -73,6 +75,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +107,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -157,6 +211,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -193,6 +248,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -273,6 +335,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -304,6 +368,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -320,3 +400,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: '2' (type: string) + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out index c0fdfd3..0ceb6a5 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_21.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -78,6 +79,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -108,6 +116,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -139,6 +149,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -155,6 +181,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -185,6 +247,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -215,6 +279,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -231,6 +311,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -261,6 +377,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -292,6 +410,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -308,6 +442,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -338,6 +508,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -369,6 +541,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -385,6 +573,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -415,6 +639,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -445,6 +671,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -461,3 +703,39 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out index 36e8792..8bd0fd6 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_22.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -74,6 +75,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY @@ -180,6 +188,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -214,6 +223,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index 82f5804..20d51db 100644 --- a/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -620,6 +620,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -648,6 +649,27 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Reduce Output Operator + sort order: + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -671,6 +693,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out index 40c0ec5..5754a74 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_5.q.out @@ -101,7 +101,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 1 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out index 0740df3..5dc2426 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_6.q.out @@ -75,7 +75,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -184,7 +184,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out b/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out index fc5066c..28501ab 100644 --- a/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out +++ b/ql/src/test/results/clientpositive/sort_merge_join_desc_7.q.out @@ -111,7 +111,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -158,7 +158,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -259,7 +259,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -308,7 +308,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out b/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out index 0d5ba01..f0cf74c 100644 --- a/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out +++ b/ql/src/test/results/clientpositive/spark/alter_merge_stats_orc.q.out @@ -89,7 +89,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -241,7 +241,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 @@ -290,7 +290,7 @@ Database: default Table: src_orc_merge_test_part_stat #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 1500 rawDataSize 141000 diff --git a/ql/src/test/results/clientpositive/spark/auto_join1.q.out b/ql/src/test/results/clientpositive/spark/auto_join1.q.out index d9cd770..74ec63d 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join1.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -46,6 +47,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +71,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col2 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col2 (type: string) @@ -82,8 +85,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join14.q.out b/ql/src/test/results/clientpositive/spark/auto_join14.q.out index 82deefe..fa5c433 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join14.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -46,6 +47,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -82,8 +85,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join17.q.out b/ql/src/test/results/clientpositive/spark/auto_join17.q.out index 6d63fa6..f818416 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join17.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join17.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -46,6 +47,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +71,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) @@ -82,8 +85,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join19.q.out b/ql/src/test/results/clientpositive/spark/auto_join19.q.out index 88ef3f1..fd4f104 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join19.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join19.q.out @@ -21,13 +21,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -70,7 +73,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col4 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) @@ -84,8 +87,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') diff --git a/ql/src/test/results/clientpositive/spark/auto_join2.q.out b/ql/src/test/results/clientpositive/spark/auto_join2.q.out index e32abba..5a98744 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join2.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join2.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -43,7 +44,7 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -64,6 +65,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -86,7 +89,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -96,7 +99,7 @@ STAGE PLANS: 1 UDFToDouble(_col0) (type: double) outputColumnNames: _col0, _col3 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) @@ -110,8 +113,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -126,6 +156,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j2 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key) INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join26.q.out b/ql/src/test/results/clientpositive/spark/auto_join26.q.out index a0deaab..299dbcc 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join26.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join26.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join3.q.out b/ql/src/test/results/clientpositive/spark/auto_join3.q.out index a17cc1a..31d1017 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join3.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join3.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -44,7 +45,7 @@ STAGE PLANS: 2 _col0 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: src3 @@ -66,6 +67,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -90,8 +93,8 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 2 - 2 Map 3 + 1 Map 3 + 2 Map 4 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col3 (type: string) @@ -105,8 +108,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -121,6 +151,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join4.q.out b/ql/src/test/results/clientpositive/spark/auto_join4.q.out index 9c9ed36..3fb5482 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join4.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join4.q.out @@ -41,13 +41,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -68,6 +69,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -90,7 +93,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int), _col3 (type: string) @@ -104,8 +107,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +150,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join5.q.out b/ql/src/test/results/clientpositive/spark/auto_join5.q.out index 5f0b044..1e1f614 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join5.q.out @@ -41,6 +41,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -68,6 +69,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -104,8 +107,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +150,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join6.q.out b/ql/src/test/results/clientpositive/spark/auto_join6.q.out index 4f1e631..2033b05 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join6.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join6.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join7.q.out b/ql/src/test/results/clientpositive/spark/auto_join7.q.out index ef99e51..6f2ea1d 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join7.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join7.q.out @@ -50,12 +50,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -76,7 +78,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -94,7 +96,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src3 @@ -136,6 +138,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -150,6 +179,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join8.q.out b/ql/src/test/results/clientpositive/spark/auto_join8.q.out index 6909113..02d956a 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join8.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join8.q.out @@ -41,13 +41,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -68,6 +69,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -90,7 +93,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col2 is null (type: boolean) @@ -107,8 +110,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -123,6 +153,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/auto_join9.q.out b/ql/src/test/results/clientpositive/spark/auto_join9.q.out index b2ed51c..0187f42 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join9.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join9.q.out @@ -19,13 +19,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src2 @@ -46,6 +47,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,7 +71,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col4 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: UDFToInteger(_col0) (type: int), _col4 (type: string) @@ -82,8 +85,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out index 4527b35..7cc0f70 100644 --- a/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_join_reordering_values.q.out @@ -126,7 +126,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -147,7 +147,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -199,7 +199,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -220,7 +220,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -272,7 +272,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -293,7 +293,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -345,7 +345,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -366,7 +366,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}} bucket_count -1 column.name.delimiter , columns dealid,date,time,cityid,userid @@ -418,7 +418,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid @@ -439,7 +439,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}} bucket_count -1 column.name.delimiter , columns userid diff --git a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out index 602ccb2..1dd6b5d 100644 --- a/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -353,7 +353,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col3 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1327,15 +1327,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -1371,6 +1376,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) File Output Operator compressed: false Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE @@ -1379,6 +1397,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1393,6 +1476,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -1525,6 +1622,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -1532,10 +1631,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: a @@ -1564,6 +1665,39 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator expressions: _col0 (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE @@ -1582,6 +1716,20 @@ STAGE PLANS: Reducer 2 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int) mode: mergepartial @@ -1599,6 +1747,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1613,6 +1788,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out index cd4b83a..b303f7c 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_13.q.out @@ -70,15 +70,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -110,6 +115,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -122,6 +140,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -136,6 +219,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -255,15 +352,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: a @@ -295,6 +397,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -307,6 +422,71 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Sorted Merge Bucket Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -321,6 +501,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -438,9 +632,12 @@ INSERT OVERWRITE TABLE dest2 select value1, value2 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4 + Stage-8 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -449,7 +646,30 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Local Work: + Map Reduce Local Work + + Stage: Stage-6 + Spark +#### A masked pattern was here #### + Vertices: + Map 4 Map Operator Tree: TableScan alias: b @@ -470,9 +690,12 @@ STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: a @@ -492,7 +715,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 4 Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col2 (type: int) @@ -506,6 +729,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: _col1 (type: string), _col3 (type: string) outputColumnNames: _col0, _col1 @@ -520,6 +756,75 @@ STAGE PLANS: name: default.dest2 Local Work: Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string), _col3 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: k1, k2 + Statistics: Num rows: 11 Data size: 77 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Local Work: + Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -534,6 +839,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: k1, k2 + Column Types: string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -581,28 +900,6 @@ POSTHOOK: query: select * from dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 #### A masked pattern was here #### -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -2 2 -4 4 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -5 5 -8 8 -9 9 PREHOOK: query: select * from dest2 PREHOOK: type: QUERY PREHOOK: Input: default@dest2 @@ -611,25 +908,3 @@ POSTHOOK: query: select * from dest2 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest2 #### A masked pattern was here #### -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_0 val_0 -val_2 val_2 -val_4 val_4 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_5 val_5 -val_8 val_8 -val_9 val_9 diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out index de08021..f3d4b7e 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_6.q.out @@ -82,8 +82,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -98,20 +99,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -128,6 +122,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -136,6 +147,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -146,7 +172,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -192,8 +218,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -208,20 +235,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: d @@ -238,6 +258,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -246,6 +283,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -256,7 +308,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -302,8 +354,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -318,20 +371,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: h @@ -348,6 +394,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -356,6 +419,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -366,7 +444,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -412,8 +490,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -428,20 +507,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -458,6 +530,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -466,6 +555,21 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -476,7 +580,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -601,8 +705,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -617,20 +722,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -647,6 +745,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -655,6 +770,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -665,7 +795,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -711,8 +841,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -727,20 +858,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -757,6 +881,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -765,6 +906,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -775,7 +931,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -821,8 +977,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -837,20 +994,13 @@ STAGE PLANS: expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: UDFToDouble(_col0) (type: double) - sort order: + - Map-reduce partition columns: UDFToDouble(_col0) (type: double) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: UDFToDouble(_col0) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(_col0) (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -867,6 +1017,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: UDFToDouble(_col0) (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -875,6 +1042,21 @@ STAGE PLANS: keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -885,7 +1067,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -1010,8 +1192,9 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 5 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1026,20 +1209,13 @@ STAGE PLANS: expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Sorted Merge Bucket Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col1 - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map 4 + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Map 5 Map Operator Tree: TableScan alias: c @@ -1056,6 +1232,23 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -1064,6 +1257,21 @@ STAGE PLANS: keys: 0 _col1 (type: string) 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count() @@ -1074,7 +1282,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 3 + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) diff --git a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out index cdb69db..d15d9c1 100644 --- a/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out +++ b/ql/src/test/results/clientpositive/spark/auto_sortmerge_join_9.q.out @@ -504,7 +504,7 @@ STAGE PLANS: 1 Reducer 4 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -2128,7 +2128,7 @@ STAGE PLANS: 1 Reducer 5 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + expressions: _col0 (type: int), _col3 (type: bigint), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 5 Data size: 38 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/bucket2.q.out b/ql/src/test/results/clientpositive/spark/bucket2.q.out index 540e6f8..56dfc95 100644 --- a/ql/src/test/results/clientpositive/spark/bucket2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket2.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -135,6 +136,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -169,6 +205,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/bucket3.q.out b/ql/src/test/results/clientpositive/spark/bucket3.q.out index 078460f..4c9fe02 100644 --- a/ql/src/test/results/clientpositive/spark/bucket3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket3.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -133,6 +135,60 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -166,6 +222,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + Is Table Level Stats: false + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/bucket4.q.out b/ql/src/test/results/clientpositive/spark/bucket4.q.out index d944a5e..7811156 100644 --- a/ql/src/test/results/clientpositive/spark/bucket4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket4.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -137,6 +138,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -172,6 +208,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket4_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket4_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/bucket5.q.out b/ql/src/test/results/clientpositive/spark/bucket5.q.out index 1d0438c..9a3db1c 100644 --- a/ql/src/test/results/clientpositive/spark/bucket5.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket5.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -109,7 +113,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -220,9 +224,55 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Reducer 3 Needs Tagging: false Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: Select Operator expressions: UDFToInteger(KEY.reducesinkkey0) (type: int), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -259,6 +309,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 5 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -294,6 +390,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucketed_table + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.unbucketed_table + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -362,7 +474,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} SORTBUCKETCOLSPREFIX TRUE numFiles 2 numRows 500 diff --git a/ql/src/test/results/clientpositive/spark/bucket6.q.out b/ql/src/test/results/clientpositive/spark/bucket6.q.out index d5d53d3..c93bf3f 100644 --- a/ql/src/test/results/clientpositive/spark/bucket6.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket6.q.out @@ -16,12 +16,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -53,6 +55,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -67,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_bucket + PREHOOK: query: insert into table src_bucket select key,value from srcpart PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out index 0fcf844..836bb47 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark1.q.out @@ -121,13 +121,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -209,6 +210,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -233,7 +236,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -273,6 +276,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -333,6 +352,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -366,6 +415,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -428,13 +485,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -511,6 +569,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -535,7 +595,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -553,7 +613,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -574,6 +634,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -629,6 +705,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -639,7 +745,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -662,6 +768,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out index 01f1003..6e999ea 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark2.q.out @@ -105,13 +105,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -193,6 +194,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -217,7 +220,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -257,6 +260,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -317,6 +336,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -350,6 +399,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -412,13 +469,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -500,6 +558,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -524,7 +584,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -543,7 +603,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -564,6 +624,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -624,6 +700,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -634,7 +740,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -657,6 +763,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out index 244ac03..3d6dd71 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark3.q.out @@ -105,6 +105,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -193,6 +194,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -257,6 +260,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -317,6 +336,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -350,6 +399,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -412,6 +469,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -495,6 +553,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -537,7 +597,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -558,6 +618,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -613,6 +689,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -623,7 +729,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -646,6 +752,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out index bd67ee3..d3c9893 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_spark4.q.out @@ -111,7 +111,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -134,7 +134,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -187,7 +187,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -210,7 +210,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -317,7 +317,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -340,7 +340,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -490,7 +490,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -513,7 +513,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -566,7 +566,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -589,7 +589,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -690,7 +690,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -713,7 +713,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out index 08d115d..a5d8970 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez1.q.out @@ -350,7 +350,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan alias: a @@ -390,10 +390,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 2 + Map 1 Map Operator Tree: TableScan alias: b @@ -411,31 +411,35 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1 input vertices: - 0 Map 1 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 1 Map 4 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -503,36 +507,36 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 1 + Map 3 Map Operator Tree: TableScan - alias: d - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) 1 _col0 (type: int) Local Work: Map Reduce Local Work - Map 2 + Map 4 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + alias: d + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: 0 _col0 (type: int) @@ -543,10 +547,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 1 Map Operator Tree: TableScan alias: b @@ -564,31 +568,35 @@ STAGE PLANS: keys: 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1 input vertices: - 0 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - input vertices: - 0 Map 1 - Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: + Select Operator + expressions: _col1 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + input vertices: + 1 Map 4 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) Local Work: Map Reduce Local Work - Reducer 4 + Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -764,7 +772,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: b @@ -778,8 +786,8 @@ STAGE PLANS: Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Local Work: Map Reduce Local Work @@ -787,7 +795,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: tab @@ -809,10 +817,10 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Map 2 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: tab_part @@ -832,7 +840,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int), substr(_col2, 5) (type: string) @@ -852,7 +860,7 @@ STAGE PLANS: value expressions: _col1 (type: double) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -870,14 +878,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 input vertices: - 1 Map 4 + 0 Map 1 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + expressions: _col0 (type: int), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 302 Data size: 3213 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -915,7 +923,7 @@ STAGE PLANS: Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -937,10 +945,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Map 2 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 2 Map Operator Tree: TableScan alias: x @@ -960,7 +968,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 266 Data size: 2822 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), substr(_col1, 5) (type: string) @@ -980,7 +988,7 @@ STAGE PLANS: value expressions: _col1 (type: double) Local Work: Map Reduce Local Work - Reducer 2 + Reducer 3 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -996,14 +1004,14 @@ STAGE PLANS: Statistics: Num rows: 133 Data size: 1411 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: int) - 1 _col0 (type: int) + 0 _col0 (type: int) + 1 _col1 (type: int) Stage: Stage-1 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 1 Map Operator Tree: TableScan alias: b @@ -1019,14 +1027,14 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + 0 _col0 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col1, _col2, _col3 input vertices: - 0 Reducer 2 + 1 Reducer 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: int), _col0 (type: double), _col3 (type: string) + expressions: _col3 (type: int), _col2 (type: double), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1066,14 +1074,14 @@ STAGE PLANS: Map 2 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -1085,14 +1093,14 @@ STAGE PLANS: Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 242 Data size: 2566 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: @@ -1126,13 +1134,13 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) 2 _col0 (type: int) - outputColumnNames: _col0, _col1, _col3 + outputColumnNames: _col0, _col1, _col4 input vertices: 1 Map 2 2 Map 3 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col3 (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col4 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -1671,19 +1679,19 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -1706,28 +1714,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 2 Local Work: Map Reduce Local Work @@ -1736,22 +1745,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 input vertices: 1 Map 4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out index 9d6b523..fc32ce3 100644 --- a/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucket_map_join_tez2.q.out @@ -125,19 +125,19 @@ STAGE PLANS: Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: value is not null (type: boolean) + predicate: key is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Spark HashTable Sink Operator keys: - 0 _col1 (type: string) - 1 _col1 (type: string) + 0 _col0 (type: int) + 1 _col0 (type: int) Local Work: Map Reduce Local Work @@ -160,28 +160,29 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: string) + value expressions: _col0 (type: int) Map 3 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: key is not null (type: boolean) + predicate: value is not null (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: string) sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reducer 2 Local Work: Map Reduce Local Work @@ -190,22 +191,22 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + 0 _col1 (type: string) + 1 _col1 (type: string) + outputColumnNames: _col0, _col2 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: string) - 1 _col1 (type: string) - outputColumnNames: _col0, _col3 + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col2 input vertices: 1 Map 4 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col0 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out index 1b47864..57e87de 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin1.q.out @@ -378,12 +378,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -460,7 +462,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -583,6 +585,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -616,6 +664,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b @@ -733,12 +789,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -815,7 +873,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -917,7 +975,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -938,6 +996,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -948,7 +1052,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -971,6 +1075,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out index 9e44d1a..72a093e 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin13.q.out @@ -110,7 +110,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -205,7 +205,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name value column.name.delimiter , @@ -254,7 +254,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -409,7 +409,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -510,7 +510,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -670,7 +670,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -771,7 +771,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -931,7 +931,7 @@ STAGE PLANS: partition values: part 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , @@ -1032,7 +1032,7 @@ STAGE PLANS: partition values: part 2 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 2 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out index 58a2935..aabb01a 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin2.q.out @@ -104,12 +104,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -188,7 +190,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -311,6 +313,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -344,6 +392,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -465,12 +521,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -549,7 +607,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -651,7 +709,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -672,6 +730,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -682,7 +786,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -705,6 +809,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b @@ -843,12 +955,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -927,7 +1041,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1078,7 +1192,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1099,6 +1213,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1109,7 +1269,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -1132,6 +1292,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out index 6ac97a1..073f2a9 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin3.q.out @@ -128,12 +128,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -212,7 +214,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -335,6 +337,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -368,6 +416,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b @@ -489,12 +545,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -573,7 +631,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin_part_2 Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -675,7 +733,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -696,6 +754,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -706,7 +810,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -729,6 +833,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out index c1ea0ab..399b006 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin4.q.out @@ -128,12 +128,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -210,7 +212,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -331,6 +333,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -364,6 +412,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(b)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin b @@ -473,12 +529,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -555,7 +613,7 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -655,7 +713,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -676,6 +734,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 28 Data size: 3025 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -686,7 +790,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -709,6 +813,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out index fc95d13..a68163b 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin5.q.out @@ -179,6 +179,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -261,6 +262,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -321,6 +324,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -430,6 +449,36 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part/ds=2008-04-08 [b] /srcbucket_mapjoin_part/ds=2008-04-09 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -463,6 +512,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part b @@ -585,6 +642,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -667,6 +725,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -706,7 +766,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -727,6 +787,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -836,6 +912,36 @@ STAGE PLANS: Truncated Path -> Alias: /srcbucket_mapjoin_part_2/ds=2008-04-08 [b] /srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -846,7 +952,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}} bucket_count -1 column.name.delimiter , columns key,value1,value2 @@ -869,6 +975,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result select /*+mapjoin(a)*/ a.key, a.value, b.value from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out index f4537ff..2637c79 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative.q.out @@ -80,13 +80,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -159,6 +160,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -179,7 +182,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -218,6 +221,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -271,6 +290,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -304,3 +353,11 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + diff --git a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out index 50a726e..fa1b057 100644 --- a/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketmapjoin_negative2.q.out @@ -89,13 +89,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: b @@ -222,6 +223,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -242,7 +245,7 @@ STAGE PLANS: 1 key (type: int) outputColumnNames: _col0, _col1, _col6 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE BucketMapJoin: true @@ -282,6 +285,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Bucket Mapjoin Context: @@ -340,6 +359,36 @@ STAGE PLANS: name: default.srcbucket_mapjoin Truncated Path -> Alias: /srcbucket_mapjoin [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -373,3 +422,11 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.bucketmapjoin_tmp_result + Is Table Level Stats: true + diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out index 81a064b..e0706ea 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_2.q.out @@ -93,6 +93,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -122,6 +123,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -172,6 +174,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -188,6 +225,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -269,6 +313,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -298,6 +343,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -348,6 +394,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -364,6 +445,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -469,6 +557,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -498,6 +587,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -548,6 +638,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 184 Data size: 1619 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -564,6 +689,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -675,6 +807,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -704,6 +837,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -754,6 +888,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -770,6 +939,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -863,6 +1039,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -892,6 +1069,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -942,6 +1120,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -958,6 +1171,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.v1, b.v2) FROM @@ -1051,6 +1271,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1080,6 +1301,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1130,6 +1352,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1146,6 +1403,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key+a.key, concat(a.value, b.value) FROM diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out index 4245aa1..f23579d 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_4.q.out @@ -69,6 +69,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -94,6 +95,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -140,6 +142,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, key2, value + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -156,6 +193,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -256,6 +300,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -285,6 +330,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -331,6 +377,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -347,6 +428,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a JOIN test_table2 b diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out index 5e4e5ef..66fc9ac 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_6.q.out @@ -71,6 +71,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -100,6 +101,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -150,6 +152,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -166,6 +203,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, a.key2, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -254,6 +298,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -283,6 +328,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -333,6 +379,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -349,6 +430,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key, subq1.key2, subq1.value from ( @@ -437,6 +525,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -466,6 +555,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -516,6 +606,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -532,6 +657,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq1.key2, subq1.key, subq1.value from @@ -555,6 +687,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -584,6 +717,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -634,6 +768,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -650,6 +819,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: EXPLAIN INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from @@ -679,6 +855,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -708,6 +885,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -758,6 +936,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -774,6 +987,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.key, subq2.key2, subq2.value from ( @@ -880,6 +1100,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -909,6 +1130,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -959,6 +1181,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -975,6 +1232,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT subq2.k2, subq2.k1, subq2.value from ( @@ -1091,6 +1355,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1120,6 +1385,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1170,6 +1436,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 1076 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 538 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1186,3 +1487,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table4 + diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out index 7c23da7..aab48eb 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_7.q.out @@ -71,6 +71,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -100,6 +101,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -150,6 +152,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -166,6 +203,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -251,6 +295,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -280,6 +325,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -330,6 +376,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -346,6 +427,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM @@ -437,6 +525,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -466,6 +555,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -516,6 +606,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 30 Data size: 269 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 15 Data size: 134 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -532,6 +657,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, concat(a.value, b.value) FROM diff --git a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out index 8263b6f..2c0be3f 100644 --- a/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out +++ b/ql/src/test/results/clientpositive/spark/bucketsortoptimize_insert_8.q.out @@ -69,6 +69,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -98,6 +99,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -148,6 +150,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -164,6 +201,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.key, b.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b @@ -246,6 +290,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -275,6 +320,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (PARTITION-LEVEL SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -325,6 +371,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string) + outputColumnNames: key, key2, value, ds + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 92 Data size: 809 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 46 Data size: 404 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -341,6 +422,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, key2, value + Column Types: int, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT b.key, a.key, concat(a.value, b.value) FROM test_table1 a JOIN test_table2 b diff --git a/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out index 5dd79de..0dd6ddd 100644 --- a/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out +++ b/ql/src/test/results/clientpositive/spark/disable_merge_for_bucketing.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -135,6 +136,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -169,6 +205,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket2_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket2_1 select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out index 6572511..6c0ece6 100644 --- a/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out +++ b/ql/src/test/results/clientpositive/spark/dynamic_rdd_cache.q.out @@ -176,6 +176,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -183,11 +185,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 2 <- Map 6 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 31) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -228,9 +232,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -248,6 +279,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -262,6 +320,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -335,14 +407,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -361,7 +435,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -377,7 +451,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -416,7 +490,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -439,7 +540,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -462,6 +576,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -476,6 +603,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: SELECT * FROM tmptable x SORT BY x.key PREHOOK: type: QUERY PREHOOK: Input: default@tmptable diff --git a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out index 52aa055..8c680bf 100644 --- a/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out +++ b/ql/src/test/results/clientpositive/spark/filter_join_breaktask.q.out @@ -73,7 +73,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -148,7 +148,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -222,7 +222,7 @@ STAGE PLANS: partition values: ds 2008-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/spark/groupby10.q.out b/ql/src/test/results/clientpositive/spark/groupby10.q.out index b572995..c646733 100644 --- a/ql/src/test/results/clientpositive/spark/groupby10.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby10.q.out @@ -44,6 +44,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -51,13 +53,17 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: input @@ -105,9 +111,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: int) mode: partial1 @@ -119,7 +159,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) - Reducer 5 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -139,6 +179,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -153,6 +227,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -256,6 +344,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -263,13 +353,17 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: input @@ -317,9 +411,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator aggregations: sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: int) mode: partial1 @@ -331,7 +459,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: double) - Reducer 5 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0), sum(VALUE._col1) @@ -351,6 +479,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -365,6 +527,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -468,6 +644,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -475,7 +653,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -492,7 +673,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: int) Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE @@ -514,6 +723,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) keys: KEY._col0 (type: int) @@ -532,6 +749,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0), avg(DISTINCT KEY._col1:1._col0) + keys: KEY._col0 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), val1 (type: int), val2 (type: int) Stage: Stage-0 Move Operator @@ -546,6 +785,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby11.q.out b/ql/src/test/results/clientpositive/spark/groupby11.q.out index a0f99c4..2e01db8 100644 --- a/ql/src/test/results/clientpositive/spark/groupby11.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby11.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,13 +41,17 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 2) Reducer 5 <- Reducer 4 (GROUP, 2) + Reducer 6 <- Map 11 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 8 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: src @@ -59,7 +65,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 11 Map Operator Tree: TableScan alias: src @@ -107,9 +113,52 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator aggregations: count(KEY._col1:0._col0), count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: partial1 @@ -121,7 +170,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) - Reducer 5 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -141,6 +190,49 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), val1 (type: int), val2 (type: int) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + keys: '111' (type: string) + mode: partial1 + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '111' (type: string) + sort order: + + Map-reduce partition columns: '111' (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '111' (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '111' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -157,6 +249,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map.q.out index b414aa6..9556c82 100644 --- a/ql/src/test/results/clientpositive/spark/groupby1_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby1_map.q.out @@ -16,12 +16,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +67,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,6 +108,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out index a01cee1..b9f06f4 100644 --- a/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out @@ -16,12 +16,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +67,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,6 +108,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out index f7b7f7a..22d0084 100644 --- a/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -23,6 +24,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -80,6 +82,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +123,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out index 1b7e53b..55270c4 100644 --- a/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out @@ -16,12 +16,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -59,6 +61,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g1 + Select Operator + expressions: _col0 (type: int), _col1 (type: double) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -73,6 +97,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, double + Table: default.dest_g1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby2.q.out b/ql/src/test/results/clientpositive/spark/groupby2.q.out index a5cd0e6..bea7096 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +78,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: partial1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_map.q.out b/ql/src/test/results/clientpositive/spark/groupby2_map.q.out index d2b69af..5f76784 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_map.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +69,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,6 +110,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out index 4ad0568..927e84a 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +69,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,6 +110,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY @@ -126,12 +162,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -175,6 +213,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2420 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -189,6 +254,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out index f4a567e..cac24f9 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -82,6 +84,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -96,6 +125,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out index 8ecf769..507fea1 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,6 +62,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -74,6 +98,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest_g2 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out index 3ede0fc..deabca8 100644 --- a/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -61,6 +63,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -75,6 +99,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby3.q.out b/ql/src/test/results/clientpositive/spark/groupby3.q.out index 23871ba..9b184ec 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -43,6 +44,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +70,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) Reducer 3 @@ -89,6 +92,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 4452 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -103,6 +133,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out index 71f8dc0..465ccce 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -83,6 +84,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1216 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out index 47ef5cb..c1ee7c7 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -87,6 +88,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5412 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -101,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out index 7cfca81..0783098 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -96,6 +97,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4532 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -110,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out index b2993a6..046325f 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew.q.out @@ -36,6 +36,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -76,6 +77,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Statistics: Num rows: 1 Data size: 424 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4488 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -90,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9 + Column Types: double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out index d152a07..5c998d0 100644 --- a/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby3_noskew_multi_distinct.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -80,6 +81,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Statistics: Num rows: 1 Data size: 440 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 5448 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +111,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11 + Column Types: double, double, double, double, double, double, double, double, double, double, double + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(substr(src.value,5)), diff --git a/ql/src/test/results/clientpositive/spark/groupby4.q.out b/ql/src/test/results/clientpositive/spark/groupby4.q.out index 3ad01d0..1f2cf2e 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +71,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,6 +119,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby4_map.q.out b/ql/src/test/results/clientpositive/spark/groupby4_map.q.out index 39536bb..61cdbb7 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -73,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out index 535e770..84f79d9 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -59,6 +60,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -73,6 +94,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out index 04f58fa..6f293bd 100644 --- a/ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -55,6 +57,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby5.q.out b/ql/src/test/results/clientpositive/spark/groupby5.q.out index d292f74..a8584824 100644 --- a/ql/src/test/results/clientpositive/spark/groupby5.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby5.q.out @@ -22,6 +22,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -29,6 +30,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -80,6 +83,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +131,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/spark/groupby5_map.q.out b/ql/src/test/results/clientpositive/spark/groupby5_map.q.out index add3094..7ce80d9 100644 --- a/ql/src/test/results/clientpositive/spark/groupby5_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby5_map.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -75,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out index 924ef5d..973b932 100644 --- a/ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -61,6 +62,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -75,6 +96,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out index 300ccb6..e42bc98 100644 --- a/ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out @@ -22,12 +22,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -65,6 +67,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,6 +103,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) FROM src diff --git a/ql/src/test/results/clientpositive/spark/groupby6.q.out b/ql/src/test/results/clientpositive/spark/groupby6.q.out index 4f406d7..27878df 100644 --- a/ql/src/test/results/clientpositive/spark/groupby6.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby6.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,8 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -68,6 +71,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,6 +119,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby6_map.q.out b/ql/src/test/results/clientpositive/spark/groupby6_map.q.out index 03f68c6..3c83ceb 100644 --- a/ql/src/test/results/clientpositive/spark/groupby6_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby6_map.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -60,6 +62,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -74,6 +103,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out index 606b5d5..0df968d 100644 --- a/ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -25,6 +26,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -73,6 +75,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -87,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out index eb72f01..8e76e98 100644 --- a/ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -55,6 +57,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +93,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/groupby7_map.q.out b/ql/src/test/results/clientpositive/spark/groupby7_map.q.out index 31daab8..e732ee2 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_map.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 2 <- Map 6 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 31) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -80,9 +84,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -100,6 +131,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -114,6 +172,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out index 625a737..30f9af3 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_map_multi_single_reducer.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,7 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -53,7 +58,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: substr(value, 5) (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -75,6 +108,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -93,6 +139,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -107,6 +180,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out index 4fbfd30..9bcf4fc 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_map_skew.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,12 +37,14 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 5 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP, 31) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: src @@ -95,7 +99,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -115,6 +146,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -129,6 +187,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out index a26247a..7158f48 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_noskew.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 31) - Reducer 3 <- Map 4 (GROUP, 31) + Reducer 2 <- Map 6 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 31) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -74,9 +78,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: complete @@ -94,6 +120,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -108,6 +156,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out index 2dce301..150de1f 100644 --- a/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,9 +37,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Reducer 5 (SORT, 1) - Reducer 4 <- Reducer 5 (SORT, 1) - Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 7 (SORT, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Reducer 7 (SORT, 1) + Reducer 6 <- Reducer 5 (GROUP, 1) + Reducer 7 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -76,8 +80,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double) outputColumnNames: _col0, _col1 @@ -97,7 +123,29 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -127,6 +175,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby8.q.out b/ql/src/test/results/clientpositive/spark/groupby8.q.out index f7f3279..79bb6ab 100644 --- a/ql/src/test/results/clientpositive/spark/groupby8.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby8.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,12 +37,16 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 5 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Reducer 2 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: src @@ -88,9 +94,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: final @@ -108,6 +148,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -122,6 +196,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -801,6 +889,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -808,12 +898,16 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) - Reducer 5 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Reducer 2 (GROUP, 2) + Reducer 8 <- Reducer 7 (GROUP, 2) + Reducer 9 <- Reducer 8 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 10 Map Operator Tree: TableScan alias: src @@ -861,9 +955,43 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 5 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: final @@ -881,6 +1009,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -895,6 +1057,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby8_map.q.out b/ql/src/test/results/clientpositive/spark/groupby8_map.q.out index 288ca3f..fa911bc 100644 --- a/ql/src/test/results/clientpositive/spark/groupby8_map.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby8_map.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,7 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +57,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -74,6 +107,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -92,6 +138,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -106,6 +179,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out b/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out index 9e76fd5..0b3520c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,12 +37,14 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) - Reducer 5 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP, 31) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: src @@ -94,7 +98,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Reducer 5 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -114,6 +145,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -128,6 +186,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out b/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out index 288ca3f..c412709 100644 --- a/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,7 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +57,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -74,6 +107,14 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) @@ -92,6 +133,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int), value (type: string) Stage: Stage-0 Move Operator @@ -106,6 +169,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby9.q.out b/ql/src/test/results/clientpositive/spark/groupby9.q.out index d59d8cf..4f2e655 100644 --- a/ql/src/test/results/clientpositive/spark/groupby9.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby9.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -59,7 +63,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -99,9 +103,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -119,6 +150,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -133,6 +191,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -813,6 +885,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -820,11 +894,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -844,7 +920,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -884,9 +960,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -904,6 +1007,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -918,6 +1048,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -1598,6 +1742,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -1605,11 +1751,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -1629,7 +1777,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -1669,9 +1817,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -1689,6 +1864,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1703,6 +1905,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -2383,6 +2599,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -2390,11 +2608,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -2415,7 +2635,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -2456,9 +2676,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -2476,6 +2723,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2490,6 +2764,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -3170,6 +3458,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -3177,11 +3467,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -3201,7 +3493,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -3241,9 +3533,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -3261,6 +3580,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -3275,6 +3621,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out index cace096..43ab17a 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_cube1.q.out @@ -567,6 +567,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -574,13 +576,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: t1 @@ -601,7 +605,7 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Map 7 + Map 9 Map Operator Tree: TableScan alias: t1 @@ -657,9 +661,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials @@ -671,7 +702,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 4 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -692,6 +723,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 2 Data size: 60 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -706,6 +764,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out index 34a01e2..20eda16 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -204,6 +206,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -237,6 +285,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out index d4fd0bb..bc74739 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_map_ppr_multi_distinct.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -204,6 +206,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -237,6 +285,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out index bf4132a..c595a48 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -59,7 +63,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -99,9 +103,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -119,6 +150,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -133,6 +191,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out index c16df1b..fec6003 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer.q.out @@ -54,6 +54,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-2 depends on stages: Stage-3 Stage-4 depends on stages: Stage-2 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-0 depends on stages: Stage-3 Stage-5 depends on stages: Stage-0 Stage-1 depends on stages: Stage-3 @@ -63,7 +66,12 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 6 (GROUP, 1) + Reducer 4 <- Reducer 7 (GROUP, 1) + Reducer 5 <- Reducer 8 (GROUP, 1) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -81,7 +89,49 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -103,6 +153,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -145,6 +208,66 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 8 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Stage: Stage-2 Move Operator @@ -159,6 +282,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + Stage: Stage-0 Move Operator tables: @@ -282,6 +426,11 @@ STAGE DEPENDENCIES: Stage-5 is a root stage Stage-2 depends on stages: Stage-5 Stage-6 depends on stages: Stage-2 + Stage-11 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-13 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-14 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 + Stage-15 depends on stages: Stage-6, Stage-7, Stage-8, Stage-9, Stage-10 Stage-0 depends on stages: Stage-5 Stage-7 depends on stages: Stage-0 Stage-1 depends on stages: Stage-5 @@ -295,12 +444,20 @@ STAGE PLANS: Stage: Stage-5 Spark Edges: - Reducer 2 <- Map 5 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (SORT, 1) + Reducer 12 <- Map 10 (PARTITION-LEVEL SORT, 2) + Reducer 13 <- Map 10 (PARTITION-LEVEL SORT, 2) + Reducer 14 <- Map 10 (PARTITION-LEVEL SORT, 2) + Reducer 15 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 16 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 12 (GROUP, 1) + Reducer 4 <- Reducer 13 (GROUP, 1) + Reducer 5 <- Reducer 14 (GROUP, 1) + Reducer 7 <- Reducer 15 (SORT, 1) + Reducer 8 <- Reducer 7 (GROUP, 1) + Reducer 9 <- Reducer 16 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 Map Operator Tree: TableScan alias: src @@ -315,7 +472,7 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 6 + Map 11 Map Operator Tree: TableScan alias: src @@ -330,7 +487,7 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string), substr(key, 2, 1) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 12 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -352,6 +509,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -394,7 +564,67 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 - Reducer 3 + Reducer 13 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 14 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 15 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -412,7 +642,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) @@ -435,8 +664,80 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h3 + Reducer 16 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 >= 5) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 @@ -456,6 +757,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_h2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2424 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -470,6 +812,41 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g2 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g3 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_g4 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h2 + + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest_h3 + Stage: Stage-0 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out index 37deb93..0913b14 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer2.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,7 +37,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +57,35 @@ STAGE PLANS: Map-reduce partition columns: substr(key, 1, 1) (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE @@ -77,6 +110,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, c1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 5) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +144,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_g3 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 5) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -112,6 +188,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, c1 + Column Types: string, int + Table: default.dest_g2 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, int + Table: default.dest_g3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out index b612747..b73e139 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_multi_single_reducer3.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -47,7 +49,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -64,7 +69,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -89,6 +122,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -110,6 +156,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -124,6 +200,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -212,6 +302,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -219,7 +311,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -236,7 +331,35 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -261,6 +384,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -282,6 +418,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -296,6 +462,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -384,6 +564,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -391,7 +573,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -408,7 +593,35 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -433,6 +646,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE @@ -454,6 +680,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((VALUE._col0) IN ('val_400', 'val_500') and (KEY._col0) IN (400, 450)) (type: boolean) + Statistics: Num rows: 63 Data size: 669 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 31 Data size: 329 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -468,6 +724,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -556,6 +826,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -563,7 +835,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -580,7 +855,35 @@ STAGE PLANS: Map-reduce partition columns: value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -605,6 +908,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -626,6 +942,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((VALUE._col0 + VALUE._col0) = 400) or (((VALUE._col0 - 100) = 500) and KEY._col0 is not null)) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -640,6 +986,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_position.q.out b/ql/src/test/results/clientpositive/spark/groupby_position.q.out index 49eb3ed..42e9088 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_position.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_position.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -58,7 +62,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -97,9 +101,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -117,6 +148,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -131,6 +189,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-1 Move Operator tables: @@ -219,6 +291,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -226,11 +300,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -249,7 +325,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -288,9 +364,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -308,6 +411,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.testtable2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -322,6 +452,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.testtable1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: int, string, string + Table: default.testtable2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out index 88fc15b..c77870a 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -197,6 +199,47 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, c1, c2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -230,6 +273,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2 + Column Types: string, int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) diff --git a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out index 5851472..bc9fe15 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -197,6 +199,47 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2400 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -230,6 +273,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) @@ -291,12 +342,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -443,7 +496,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -464,6 +517,51 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key, c1, c2, c3, c4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16), compute_stats(VALUE._col5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2432 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -474,7 +572,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"c1":"true","c2":"true","c3":"true","c4":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,c1,c2,c3,c4 @@ -497,6 +595,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, c1, c2, c3, c4 + Column Types: string, int, string, int, int + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) diff --git a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out index 6d087b2..74d4938 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_rollup1.q.out @@ -400,6 +400,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -407,13 +409,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 8 Map Operator Tree: TableScan alias: t1 @@ -434,7 +438,7 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Map 7 + Map 9 Map Operator Tree: TableScan alias: t1 @@ -490,9 +494,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: int) mode: partials @@ -504,7 +535,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 3 Data size: 90 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) - Reducer 5 + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -525,6 +556,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, val + Statistics: Num rows: 1 Data size: 30 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1468 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1472 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -539,6 +597,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t2 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, val + Column Types: string, string, int + Table: default.t3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out index 418f3ec..0d40850 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_1_23.q.out @@ -46,10 +46,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -159,6 +178,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -192,6 +241,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY @@ -237,12 +294,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -370,6 +429,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -403,6 +508,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -442,10 +555,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -479,7 +595,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -500,6 +616,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -555,6 +687,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -565,7 +727,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -588,6 +750,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY @@ -625,10 +795,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -662,7 +835,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -683,6 +856,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -738,6 +927,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -748,7 +967,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -771,6 +990,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY @@ -816,10 +1043,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -874,6 +1104,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -929,6 +1175,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -962,6 +1238,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY @@ -1008,12 +1292,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1141,6 +1427,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1174,6 +1506,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY @@ -1214,12 +1554,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1326,7 +1668,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1347,6 +1689,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1357,7 +1745,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1380,6 +1768,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -1422,12 +1818,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1544,7 +1942,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1565,17 +1963,63 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1598,6 +2042,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key + key, sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -1647,10 +2099,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1684,7 +2139,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1705,6 +2160,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1760,113 +2231,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1877,7 +2271,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1900,6 +2294,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -1929,14 +2331,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 -1 1 2 1 -2 1 -3 1 3 1 7 1 -7 1 -8 2 8 2 PREHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 @@ -1958,12 +2355,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1997,7 +2396,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2005,19 +2404,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2073,7 +2488,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 2 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -2153,7 +2568,37 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 3 + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2181,7 +2626,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2189,19 +2634,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Stage: Stage-0 Move Operator @@ -2212,7 +2673,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2220,13 +2681,13 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 @@ -2235,6 +2696,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key @@ -2293,12 +2762,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2385,7 +2856,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: t1 @@ -2495,7 +2966,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2516,6 +2987,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2526,7 +3043,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2549,6 +3066,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2866,12 +3391,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2909,7 +3436,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -2932,7 +3459,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -2979,7 +3506,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2995,11 +3522,57 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3010,7 +3583,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3033,6 +3606,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY @@ -3070,10 +3651,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3107,7 +3691,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3128,6 +3712,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3137,7 +3737,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3160,7 +3760,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3183,6 +3783,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3193,7 +3823,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3216,6 +3846,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY @@ -3264,10 +3902,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3322,6 +3963,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3331,7 +3988,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3354,7 +4011,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3377,6 +4034,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3410,6 +4097,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY @@ -3457,10 +4152,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3494,7 +4192,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3515,6 +4213,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3524,7 +4238,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3547,7 +4261,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3570,6 +4284,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3580,7 +4324,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3603,6 +4347,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq @@ -3657,10 +4409,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3694,7 +4449,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3715,6 +4470,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3724,7 +4495,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3747,7 +4518,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3770,6 +4541,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3780,7 +4581,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3803,6 +4604,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( @@ -3871,6 +4680,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -3878,10 +4689,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 5 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: t2 @@ -3925,6 +4738,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 6 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -3945,6 +4790,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -3959,6 +4845,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -4032,6 +4932,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -4039,10 +4941,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 31) + Reducer 2 <- Map 5 (GROUP, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: t2 @@ -4089,6 +4993,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 6 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 8.0) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -4109,6 +5048,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -4123,6 +5103,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out index f7fca5d..318d25c 100644 --- a/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out +++ b/ql/src/test/results/clientpositive/spark/groupby_sort_skew_1_23.q.out @@ -46,10 +46,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,6 +107,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -159,6 +178,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -192,6 +241,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T1 GROUP BY key PREHOOK: type: QUERY @@ -237,6 +294,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -244,6 +302,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -389,6 +448,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -422,6 +527,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, string, int + Table: default.outputtbl2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl2 SELECT key, val, count(1) FROM T1 GROUP BY key, val PREHOOK: type: QUERY @@ -461,10 +574,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -498,7 +614,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -519,6 +635,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -574,6 +706,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -584,7 +746,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -607,6 +769,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key PREHOOK: type: QUERY @@ -644,10 +814,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -681,7 +854,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -702,6 +875,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -757,6 +946,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -767,7 +986,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -790,6 +1009,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k PREHOOK: type: QUERY @@ -835,10 +1062,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -893,6 +1123,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -948,6 +1194,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -981,6 +1257,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT 1, key, count(1) FROM T1 GROUP BY 1, key PREHOOK: type: QUERY @@ -1027,6 +1311,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1034,6 +1319,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1179,6 +1465,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1212,6 +1544,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val PREHOOK: type: QUERY @@ -1252,6 +1592,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1259,6 +1600,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1383,7 +1725,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1404,6 +1746,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key1, key2, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1414,7 +1802,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,cnt @@ -1437,6 +1825,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, cnt + Column Types: int, int, int + Table: default.outputtbl3 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl3 SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 PREHOOK: type: QUERY @@ -1479,6 +1875,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -1486,6 +1883,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1620,7 +2018,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1641,17 +2039,63 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - - Stage: Stage-0 - Move Operator - tables: - replace: true + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1674,6 +2118,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT cast(key + key as string), sum(cnt) from (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -1723,10 +2175,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1760,7 +2215,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1781,6 +2236,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1836,113 +2307,36 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 2 - Map Operator Tree: - TableScan - alias: t1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - GatherStats: false - Select Operator - expressions: key (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(1) - keys: _col0 (type: string) - mode: final - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - GlobalTableId: 1 -#### A masked pattern was here #### - NumFilesPerFileSink: 1 - Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE -#### A masked pattern was here #### - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - bucket_count -1 - column.name.delimiter , - columns key,cnt - columns.comments - columns.types int:int -#### A masked pattern was here #### - name default.outputtbl1 - numFiles 2 - numRows 5 - rawDataSize 17 - serialization.ddl struct outputtbl1 { i32 key, i32 cnt} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 22 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Path -> Alias: -#### A masked pattern was here #### - Path -> Partition: -#### A masked pattern was here #### - Partition - base file name: t1 - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string -#### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 -#### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} - SORTBUCKETCOLSPREFIX TRUE - bucket_count 2 - bucket_field_name key - column.name.delimiter , - columns key,val - columns.comments - columns.types string:string + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 #### A masked pattern was here #### - name default.t1 - numFiles 2 - numRows 6 - rawDataSize 24 - serialization.ddl struct t1 { string key, string val} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 30 + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.t1 - name: default.t1 - Truncated Path -> Alias: - /t1 [t1] + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1953,7 +2347,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -1976,6 +2370,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) FROM T1 GROUP BY key @@ -2005,15 +2407,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@outputtbl1 #### A masked pattern was here #### 1 1 -1 1 -2 1 2 1 3 1 -3 1 -7 1 7 1 8 2 -8 2 PREHOOK: query: EXPLAIN EXTENDED INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( @@ -2034,13 +2431,15 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2074,7 +2473,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2082,19 +2481,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -2150,7 +2565,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 2 + Map 3 Map Operator Tree: TableScan alias: t1 @@ -2230,7 +2645,37 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Reducer 3 + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 4 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2248,7 +2693,7 @@ STAGE PLANS: tag: -1 value expressions: _col1 (type: bigint) auto parallelism: false - Reducer 4 + Reducer 5 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -2276,7 +2721,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2284,19 +2729,35 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Stage: Stage-0 Move Operator @@ -2307,7 +2768,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2315,13 +2776,13 @@ STAGE PLANS: columns.types int:int #### A masked pattern was here #### name default.outputtbl1 - numFiles 4 - numRows 10 - rawDataSize 30 + numFiles 2 + numRows 5 + rawDataSize 15 serialization.ddl struct outputtbl1 { i32 key, i32 cnt} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 40 + totalSize 20 #### A masked pattern was here #### serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.outputtbl1 @@ -2330,6 +2791,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT * FROM ( SELECT key, count(1) as cnt FROM T1 GROUP BY key @@ -2388,12 +2857,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2480,7 +2951,7 @@ STAGE PLANS: name: default.t1 Truncated Path -> Alias: /t1 [t1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: t1 @@ -2590,7 +3061,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2611,6 +3082,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -2621,7 +3138,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -2644,6 +3161,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT subq1.key, subq1.cnt+subq2.cnt FROM (SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 @@ -2980,6 +3505,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -2987,6 +3513,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3024,7 +3551,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3047,7 +3574,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3112,7 +3639,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3128,11 +3655,57 @@ STAGE PLANS: serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 20 #### A masked pattern was here #### - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.outputtbl1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.outputtbl1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3143,7 +3716,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key":"true"}} bucket_count -1 column.name.delimiter , columns key,cnt @@ -3166,6 +3739,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.outputtbl1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl1 SELECT key, count(1) FROM T2 GROUP BY key PREHOOK: type: QUERY @@ -3203,10 +3784,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3240,7 +3824,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3261,6 +3845,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3270,7 +3870,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3293,7 +3893,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3316,6 +3916,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3326,7 +3956,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3349,6 +3979,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, 1, val, count(1) FROM T2 GROUP BY key, 1, val PREHOOK: type: QUERY @@ -3397,10 +4035,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3455,6 +4096,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) + outputColumnNames: key1, key2, key3, key4, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(key4, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2396 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3464,7 +4121,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3487,7 +4144,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3510,6 +4167,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2412 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3543,6 +4230,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, key4, cnt + Column Types: int, int, string, int, int + Table: default.outputtbl5 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl5 SELECT key, 1, val, 2, count(1) FROM T2 GROUP BY key, 1, val, 2 PREHOOK: type: QUERY @@ -3590,10 +4285,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3627,7 +4325,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3648,6 +4346,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3657,7 +4371,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3680,7 +4394,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3703,6 +4417,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3713,7 +4457,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3736,6 +4480,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 SELECT key, constant, val, count(1) from (SELECT key, 1 as constant, val from T2)subq @@ -3790,10 +4542,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -3827,7 +4582,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3848,6 +4603,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int) + outputColumnNames: key1, key2, key3, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(key2, 16), compute_stats(key3, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -3857,7 +4628,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3880,7 +4651,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 2 bucket_field_name key @@ -3903,6 +4674,36 @@ STAGE PLANS: name: default.t2 Truncated Path -> Alias: /t2 [t2] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3913,7 +4714,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cnt":"true","key1":"true","key2":"true","key3":"true"}} bucket_count -1 column.name.delimiter , columns key1,key2,key3,cnt @@ -3936,6 +4737,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, key2, key3, cnt + Column Types: int, int, string, int + Table: default.outputtbl4 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE outputTbl4 select key, constant3, val, count(1) from ( @@ -4004,6 +4813,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -4011,11 +4822,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -4059,6 +4872,38 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 7 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: key, val + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: key (type: string), val (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -4093,6 +4938,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -4107,6 +4993,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -4180,6 +5080,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -4187,11 +5089,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 31) Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 7 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -4238,6 +5142,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Map 7 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 8.0) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), val (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string), _col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, val, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -4272,6 +5211,47 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -4286,6 +5266,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val, cnt + Column Types: int, string, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out index 1efb81b..35b6891 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_bucketed_table.q.out @@ -42,7 +42,7 @@ Database: default Table: test_table_bucketed #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 3 numRows 309 rawDataSize 1482 diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out index 356f625..7e5a92b 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_map_operators.q.out @@ -56,10 +56,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -89,6 +92,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -105,6 +143,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT key, count(*) FROM test_table1 GROUP BY key PREHOOK: type: QUERY @@ -139,7 +184,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 309 rawDataSize 1482 @@ -177,12 +222,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -209,7 +256,7 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(_col1) (type: double) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -248,6 +295,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, part + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -264,6 +346,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT a.key, a.value FROM ( SELECT key, cast(count(*) AS STRING) AS value FROM test_table1 GROUP BY key @@ -337,10 +426,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -371,6 +463,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -387,6 +514,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT /*+ MAPJOIN(a) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key PREHOOK: type: QUERY @@ -423,7 +557,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 @@ -452,12 +586,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -508,6 +644,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table_out + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 137 Data size: 1455 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -524,6 +695,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_out + PREHOOK: query: INSERT OVERWRITE TABLE test_table_out PARTITION (part = '1') SELECT /*+ MAPJOIN(a) */ b.value, count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key GROUP BY b.value @@ -562,7 +740,7 @@ Database: default Table: test_table_out #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 309 rawDataSize 2728 diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out index 175ddd6..973c8d0 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_merge.q.out @@ -40,7 +40,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 @@ -91,7 +91,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 1028 rawDataSize 10968 diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out index 13219ac..fb0f6b9 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out @@ -26,12 +26,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -62,6 +64,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-04-08' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-04-08' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-04-08' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -79,6 +116,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table + PREHOOK: query: INSERT OVERWRITE TABLE test_table PARTITION (ds = '2008-04-08', hr) SELECT key2, value, cast(hr as int) FROM (SELECT if ((key % 3) < 2, 0, 1) as key2, value, (key % 3 % 2) as hr @@ -129,7 +173,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 670 rawDataSize 5888 @@ -169,7 +213,7 @@ Database: default Table: test_table #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 330 rawDataSize 2924 diff --git a/ql/src/test/results/clientpositive/spark/innerjoin.q.out b/ql/src/test/results/clientpositive/spark/innerjoin.q.out index 2f49406..fde8047 100644 --- a/ql/src/test/results/clientpositive/spark/innerjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/innerjoin.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -83,6 +85,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 INNER JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/input12.q.out b/ql/src/test/results/clientpositive/spark/input12.q.out index 2efd81b..ed1e45c 100644 --- a/ql/src/test/results/clientpositive/spark/input12.q.out +++ b/ql/src/test/results/clientpositive/spark/input12.q.out @@ -38,6 +38,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -46,9 +49,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-3 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -68,6 +75,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -98,6 +118,106 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key >= 200) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -112,6 +232,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/input13.q.out b/ql/src/test/results/clientpositive/spark/input13.q.out index 09c7959..e25139c 100644 --- a/ql/src/test/results/clientpositive/spark/input13.q.out +++ b/ql/src/test/results/clientpositive/spark/input13.q.out @@ -40,6 +40,9 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -49,9 +52,13 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-4 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -71,6 +78,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key >= 100) and (key < 200)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -115,6 +135,106 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key >= 100) and (key < 200)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key >= 200) and (key < 300)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -129,6 +249,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.dest3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/input14.q.out b/ql/src/test/results/clientpositive/spark/input14.q.out index 36f162e..99fcce4 100644 --- a/ql/src/test/results/clientpositive/spark/input14.q.out +++ b/ql/src/test/results/clientpositive/spark/input14.q.out @@ -28,12 +28,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +77,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value) diff --git a/ql/src/test/results/clientpositive/spark/input17.q.out b/ql/src/test/results/clientpositive/spark/input17.q.out index d95dbcb..44fff1c 100644 --- a/ql/src/test/results/clientpositive/spark/input17.q.out +++ b/ql/src/test/results/clientpositive/spark/input17.q.out @@ -28,12 +28,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -86,6 +115,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM src_thrift SELECT TRANSFORM(src_thrift.aint + src_thrift.lint[0], src_thrift.lintstring[0]) diff --git a/ql/src/test/results/clientpositive/spark/input18.q.out b/ql/src/test/results/clientpositive/spark/input18.q.out index 65850b2..13986cb 100644 --- a/ql/src/test/results/clientpositive/spark/input18.q.out +++ b/ql/src/test/results/clientpositive/spark/input18.q.out @@ -28,12 +28,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -75,6 +77,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -89,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM src SELECT TRANSFORM(src.key, src.value, 1+2, 3+4) diff --git a/ql/src/test/results/clientpositive/spark/input1_limit.q.out b/ql/src/test/results/clientpositive/spark/input1_limit.q.out index dd49287..5e3ad71 100644 --- a/ql/src/test/results/clientpositive/spark/input1_limit.q.out +++ b/ql/src/test/results/clientpositive/spark/input1_limit.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -35,11 +37,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 1) - Reducer 3 <- Map 5 (GROUP, 1) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -57,9 +61,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -77,7 +80,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -100,8 +102,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 @@ -121,6 +150,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 50 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -135,6 +191,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/input_part2.q.out b/ql/src/test/results/clientpositive/spark/input_part2.q.out index 6387ed1..a48fa09 100644 --- a/ql/src/test/results/clientpositive/spark/input_part2.q.out +++ b/ql/src/test/results/clientpositive/spark/input_part2.q.out @@ -28,15 +28,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -82,6 +87,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-08' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) @@ -224,6 +245,198 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key < 100) and (ds = '2008-04-09')) (type: boolean) + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(key) (type: int), value (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '12' (type: string), '2008-04-09' (type: string) + outputColumnNames: key, value, hr, ds + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(hr, 16), compute_stats(ds, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -257,6 +470,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value, hr, ds + Column Types: int, string, string, string + Table: default.dest2 + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/insert_into1.q.out b/ql/src/test/results/clientpositive/spark/insert_into1.q.out index dff389d..b05e218 100644 --- a/ql/src/test/results/clientpositive/spark/insert_into1.q.out +++ b/ql/src/test/results/clientpositive/spark/insert_into1.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -39,7 +41,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -62,6 +63,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -76,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * from src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -132,12 +167,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -153,7 +190,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -176,6 +212,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -190,6 +253,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT INTO TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -246,12 +316,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -267,7 +339,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -290,6 +361,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -304,6 +402,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into1 SELECT * FROM src ORDER BY key LIMIT 10 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -360,10 +465,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -384,6 +492,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 1 (type: int), 'a' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -398,6 +533,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: insert overwrite table insert_into1 select 1, 'a' PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table @@ -416,10 +558,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -440,6 +585,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into1 + Select Operator + expressions: 2 (type: int), 'b' (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 89 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -454,6 +626,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into1 + PREHOOK: query: insert into insert_into1 select 2, 'b' PREHOOK: type: QUERY PREHOOK: Input: _dummy_database@_dummy_table diff --git a/ql/src/test/results/clientpositive/spark/insert_into2.q.out b/ql/src/test/results/clientpositive/spark/insert_into2.q.out index 329387d..f211ae4 100644 --- a/ql/src/test/results/clientpositive/spark/insert_into2.q.out +++ b/ql/src/test/results/clientpositive/spark/insert_into2.q.out @@ -22,12 +22,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -66,6 +67,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -82,6 +118,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT INTO TABLE insert_into2 PARTITION (ds='1') SELECT * FROM src order by key limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -177,12 +220,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -198,7 +243,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -221,6 +265,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -237,6 +316,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 100 PREHOOK: type: QUERY @@ -301,12 +387,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -322,7 +410,6 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -345,6 +432,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 250 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -361,6 +483,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into2 + PREHOOK: query: INSERT OVERWRITE TABLE insert_into2 PARTITION (ds='2') SELECT * FROM src order by key LIMIT 50 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/insert_into3.q.out b/ql/src/test/results/clientpositive/spark/insert_into3.q.out index a6fac23..67039ec 100644 --- a/ql/src/test/results/clientpositive/spark/insert_into3.q.out +++ b/ql/src/test/results/clientpositive/spark/insert_into3.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,11 +41,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (SORT, 1) - Reducer 3 <- Map 5 (SORT, 1) + Reducer 2 <- Map 6 (SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (SORT, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -56,21 +60,6 @@ STAGE PLANS: key expressions: _col0 (type: string), _col1 (type: string) sort order: ++ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 - Map 5 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: Select Operator @@ -92,8 +81,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 50 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string) outputColumnNames: _col0, _col1 @@ -113,6 +129,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -127,6 +170,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-1 Move Operator tables: @@ -192,6 +249,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -199,11 +258,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 1) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 6 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -218,7 +279,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string), _col1 (type: string) Reducer 2 Reduce Operator Tree: @@ -241,8 +301,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3a + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), VALUE._col1 (type: string) outputColumnNames: _col0, _col1 @@ -262,6 +349,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.insert_into3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -276,6 +390,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3a + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.insert_into3b + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/join1.q.out b/ql/src/test/results/clientpositive/spark/join1.q.out index a0ee4ea..6f67f69 100644 --- a/ql/src/test/results/clientpositive/spark/join1.q.out +++ b/ql/src/test/results/clientpositive/spark/join1.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -83,6 +85,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join14.q.out b/ql/src/test/results/clientpositive/spark/join14.q.out index e804a1d..7fda67a 100644 --- a/ql/src/test/results/clientpositive/spark/join14.q.out +++ b/ql/src/test/results/clientpositive/spark/join14.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -83,6 +85,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: c1, c2 + Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2 + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100 INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join17.q.out b/ql/src/test/results/clientpositive/spark/join17.q.out index c718916..e6f5e9b 100644 --- a/ql/src/test/results/clientpositive/spark/join17.q.out +++ b/ql/src/test/results/clientpositive/spark/join17.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -100,7 +102,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -221,6 +223,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: key1, value1, key2, value2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -254,6 +302,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key1, value1, key2, value2 + Column Types: int, string, int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.* PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join25.q.out b/ql/src/test/results/clientpositive/spark/join25.q.out index 05e5e70..9928fa2 100644 --- a/ql/src/test/results/clientpositive/spark/join25.q.out +++ b/ql/src/test/results/clientpositive/spark/join25.q.out @@ -21,6 +21,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -84,8 +87,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join26.q.out b/ql/src/test/results/clientpositive/spark/join26.q.out index b1de987..51d7b31 100644 --- a/ql/src/test/results/clientpositive/spark/join26.q.out +++ b/ql/src/test/results/clientpositive/spark/join26.q.out @@ -23,13 +23,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -102,7 +103,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: z @@ -180,6 +181,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -206,8 +209,8 @@ STAGE PLANS: 2 _col0 (type: string) outputColumnNames: _col1, _col2, _col4 input vertices: - 1 Map 2 - 2 Map 3 + 1 Map 3 + 2 Map 4 Position of Big Table: 0 Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -246,6 +249,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -299,6 +318,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -332,6 +381,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join27.q.out b/ql/src/test/results/clientpositive/spark/join27.q.out index e10d2fb..385a64d 100644 --- a/ql/src/test/results/clientpositive/spark/join27.q.out +++ b/ql/src/test/results/clientpositive/spark/join27.q.out @@ -21,6 +21,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -84,8 +87,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.value = y.value) diff --git a/ql/src/test/results/clientpositive/spark/join28.q.out b/ql/src/test/results/clientpositive/spark/join28.q.out index 8d4d870..189760a 100644 --- a/ql/src/test/results/clientpositive/spark/join28.q.out +++ b/ql/src/test/results/clientpositive/spark/join28.q.out @@ -27,6 +27,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -51,7 +52,7 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -72,6 +73,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -94,7 +97,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string) @@ -122,8 +125,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -138,6 +168,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq.key1, z.value FROM diff --git a/ql/src/test/results/clientpositive/spark/join29.q.out b/ql/src/test/results/clientpositive/spark/join29.q.out index 573628f..62ba549 100644 --- a/ql/src/test/results/clientpositive/spark/join29.q.out +++ b/ql/src/test/results/clientpositive/spark/join29.q.out @@ -23,6 +23,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -74,6 +75,7 @@ STAGE PLANS: Spark Edges: Reducer 4 <- Map 3 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -132,6 +134,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, cnt1, cnt2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt1, 16), compute_stats(cnt2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -146,6 +175,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt1, cnt2 + Column Types: string, int, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, subq1.cnt, subq2.cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git a/ql/src/test/results/clientpositive/spark/join3.q.out b/ql/src/test/results/clientpositive/spark/join3.q.out index e50f091..056aa0f 100644 --- a/ql/src/test/results/clientpositive/spark/join3.q.out +++ b/ql/src/test/results/clientpositive/spark/join3.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -60,7 +62,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 4 + Map 5 Map Operator Tree: TableScan alias: src3 @@ -102,6 +104,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -116,6 +145,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join30.q.out b/ql/src/test/results/clientpositive/spark/join30.q.out index 3584f8c..f673037 100644 --- a/ql/src/test/results/clientpositive/spark/join30.q.out +++ b/ql/src/test/results/clientpositive/spark/join30.q.out @@ -19,6 +19,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,7 @@ STAGE PLANS: Spark Edges: Reducer 3 <- Map 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: int, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join31.q.out b/ql/src/test/results/clientpositive/spark/join31.q.out index 3fee7b8..363a624 100644 --- a/ql/src/test/results/clientpositive/spark/join31.q.out +++ b/ql/src/test/results/clientpositive/spark/join31.q.out @@ -25,13 +25,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 5 Map Operator Tree: TableScan alias: y @@ -60,6 +61,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -97,7 +99,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) @@ -131,6 +133,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -145,6 +174,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT subq1.key, count(1) as cnt FROM (select x.key, count(1) as cnt from src1 x group by x.key) subq1 JOIN diff --git a/ql/src/test/results/clientpositive/spark/join32.q.out b/ql/src/test/results/clientpositive/spark/join32.q.out index c93411e..25aaf26 100644 --- a/ql/src/test/results/clientpositive/spark/join32.q.out +++ b/ql/src/test/results/clientpositive/spark/join32.q.out @@ -23,13 +23,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -101,7 +102,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -176,6 +177,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -200,7 +203,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -211,7 +214,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3, _col6 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -250,6 +253,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -305,6 +324,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -338,6 +387,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out index e658e1a..854f84d 100644 --- a/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out +++ b/ql/src/test/results/clientpositive/spark/join32_lessSize.q.out @@ -31,13 +31,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -109,7 +110,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -184,6 +185,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -208,7 +211,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -219,7 +222,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3, _col6 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -258,6 +261,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -313,6 +332,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -346,6 +395,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -482,6 +539,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-4 @@ -648,7 +706,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: y @@ -723,6 +781,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -758,7 +818,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3, _col6 input vertices: - 1 Map 4 + 1 Map 5 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -776,7 +836,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -797,6 +857,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -850,6 +926,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [w] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -860,7 +966,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -883,6 +989,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src w JOIN src1 x ON (x.value = w.value) @@ -1014,6 +1128,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1094,7 +1209,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -1169,6 +1284,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1193,7 +1310,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -1247,6 +1364,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1300,6 +1433,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [y] + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1333,6 +1496,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -1466,6 +1637,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1617,6 +1789,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -1674,7 +1848,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1695,6 +1869,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -1748,6 +1938,36 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1758,7 +1978,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -1781,6 +2001,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, z.value, res.value FROM (select x.key, x.value from src1 x LEFT OUTER JOIN src y ON (x.key = y.key)) res @@ -1926,6 +2154,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -1950,7 +2179,7 @@ STAGE PLANS: 1 _col1 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -1971,6 +2200,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -1993,7 +2224,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2021,8 +2252,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2037,6 +2295,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, x.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res @@ -2170,6 +2435,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -2194,7 +2460,7 @@ STAGE PLANS: 1 _col1 (type: string) Local Work: Map Reduce Local Work - Map 3 + Map 4 Map Operator Tree: TableScan alias: x @@ -2215,6 +2481,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -2237,7 +2505,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col1, _col2 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: string), _col2 (type: string) @@ -2265,8 +2533,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2281,6 +2576,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j2 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j2 SELECT res.key, y.value, res.value FROM (select x.key, x.value from src1 x JOIN src y ON (x.key = y.key)) res diff --git a/ql/src/test/results/clientpositive/spark/join33.q.out b/ql/src/test/results/clientpositive/spark/join33.q.out index c93411e..25aaf26 100644 --- a/ql/src/test/results/clientpositive/spark/join33.q.out +++ b/ql/src/test/results/clientpositive/spark/join33.q.out @@ -23,13 +23,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: x @@ -101,7 +102,7 @@ STAGE PLANS: name: default.src1 Truncated Path -> Alias: /src1 [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: y @@ -176,6 +177,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -200,7 +203,7 @@ STAGE PLANS: 1 _col1 (type: string) outputColumnNames: _col0, _col3 input vertices: - 1 Map 2 + 1 Map 3 Position of Big Table: 0 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Map Join Operator @@ -211,7 +214,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col3, _col6 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -250,6 +253,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -305,6 +324,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -338,6 +387,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join34.q.out b/ql/src/test/results/clientpositive/spark/join34.q.out index 86ddd1d..f0a3379 100644 --- a/ql/src/test/results/clientpositive/spark/join34.q.out +++ b/ql/src/test/results/clientpositive/spark/join34.q.out @@ -30,12 +30,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -112,7 +114,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x] - Map 3 + Map 4 Map Operator Tree: TableScan alias: x1 @@ -186,7 +188,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x1] - Map 4 + Map 5 Map Operator Tree: TableScan alias: x @@ -307,6 +309,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 365 Data size: 3878 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -340,6 +388,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.value FROM diff --git a/ql/src/test/results/clientpositive/spark/join35.q.out b/ql/src/test/results/clientpositive/spark/join35.q.out index 74b1ee4..8f49be2 100644 --- a/ql/src/test/results/clientpositive/spark/join35.q.out +++ b/ql/src/test/results/clientpositive/spark/join35.q.out @@ -30,14 +30,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 6 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 5 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 6 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 6 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -120,7 +122,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x] - Map 4 + Map 5 Map Operator Tree: TableScan alias: x1 @@ -200,7 +202,7 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [x1] - Map 6 + Map 7 Map Operator Tree: TableScan alias: x @@ -339,7 +341,53 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 182 Data size: 1938 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 6 Needs Tagging: false Reduce Operator Tree: Group By Operator @@ -390,6 +438,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, int + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT x.key, x.value, subq1.cnt FROM diff --git a/ql/src/test/results/clientpositive/spark/join36.q.out b/ql/src/test/results/clientpositive/spark/join36.q.out index b1717e0..7fd1810 100644 --- a/ql/src/test/results/clientpositive/spark/join36.q.out +++ b/ql/src/test/results/clientpositive/spark/join36.q.out @@ -61,13 +61,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: y @@ -88,6 +89,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -110,7 +113,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: int), _col3 (type: int) @@ -124,8 +127,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: key, value, val2 + Statistics: Num rows: 339 Data size: 1630 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -140,6 +170,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, int, int + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x) */ x.key, x.cnt, y.cnt FROM tmp1 x JOIN tmp2 y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join37.q.out b/ql/src/test/results/clientpositive/spark/join37.q.out index 327e93e..535a14d 100644 --- a/ql/src/test/results/clientpositive/spark/join37.q.out +++ b/ql/src/test/results/clientpositive/spark/join37.q.out @@ -21,6 +21,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 3 <- Map 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 2 @@ -84,8 +87,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Local Work: Map Reduce Local Work + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -100,6 +130,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: int, string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(X) */ x.key, x.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join39.q.out b/ql/src/test/results/clientpositive/spark/join39.q.out index 183b14a..f94ddf6 100644 --- a/ql/src/test/results/clientpositive/spark/join39.q.out +++ b/ql/src/test/results/clientpositive/spark/join39.q.out @@ -21,13 +21,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 2 + Map 3 Map Operator Tree: TableScan alias: src @@ -48,6 +49,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,7 +70,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 2 + 1 Map 3 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -77,8 +80,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, key1, val2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(key1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Local Work: Map Reduce Local Work + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1968 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -93,6 +123,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, key1, val2 + Column Types: string, string, string, string + Table: default.dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/join4.q.out b/ql/src/test/results/clientpositive/spark/join4.q.out index 08b1f58..36e9767 100644 --- a/ql/src/test/results/clientpositive/spark/join4.q.out +++ b/ql/src/test/results/clientpositive/spark/join4.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join5.q.out b/ql/src/test/results/clientpositive/spark/join5.q.out index 6d7723d..cf7ce14 100644 --- a/ql/src/test/results/clientpositive/spark/join5.q.out +++ b/ql/src/test/results/clientpositive/spark/join5.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 6 Data size: 63 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join6.q.out b/ql/src/test/results/clientpositive/spark/join6.q.out index 8ae5e3a..89da894 100644 --- a/ql/src/test/results/clientpositive/spark/join6.q.out +++ b/ql/src/test/results/clientpositive/spark/join6.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -106,6 +108,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -120,6 +149,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join7.q.out b/ql/src/test/results/clientpositive/spark/join7.q.out index d01817c..bd190c0 100644 --- a/ql/src/test/results/clientpositive/spark/join7.q.out +++ b/ql/src/test/results/clientpositive/spark/join7.q.out @@ -50,12 +50,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -76,7 +78,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -94,7 +96,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src3 @@ -136,6 +138,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6 + Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -150,6 +179,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6 + Column Types: int, string, int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join8.q.out b/ql/src/test/results/clientpositive/spark/join8.q.out index 1819808..764f235 100644 --- a/ql/src/test/results/clientpositive/spark/join8.q.out +++ b/ql/src/test/results/clientpositive/spark/join8.q.out @@ -40,12 +40,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +68,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -109,6 +111,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: c1, c2, c3, c4 + Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -123,6 +152,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4 + Column Types: int, string, int, string + Table: default.dest1 + PREHOOK: query: FROM ( FROM ( diff --git a/ql/src/test/results/clientpositive/spark/join9.q.out b/ql/src/test/results/clientpositive/spark/join9.q.out index 9b5b243..53d03e2 100644 --- a/ql/src/test/results/clientpositive/spark/join9.q.out +++ b/ql/src/test/results/clientpositive/spark/join9.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -101,7 +103,7 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=12 [src1] - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -222,6 +224,52 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -255,6 +303,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out b/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out index 67b2e94..d852b0f 100644 --- a/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out +++ b/ql/src/test/results/clientpositive/spark/join_map_ppr.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -172,6 +173,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -234,6 +237,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -289,6 +308,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -322,6 +371,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -517,6 +574,7 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 @@ -550,7 +608,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -571,7 +629,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -619,7 +677,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -640,7 +698,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -664,6 +722,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 4 <- Map 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 3 @@ -705,7 +765,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -726,6 +786,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, val2 + Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -781,6 +857,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [z] + Reducer 4 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -791,7 +897,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","val2":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value,val2 @@ -814,6 +920,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value, val2 + Column Types: string, string, string + Table: default.dest_j1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1_copy x JOIN src_copy y ON (x.key = y.key) diff --git a/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out b/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out index cdd5c66..be0c4c2 100644 --- a/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out +++ b/ql/src/test/results/clientpositive/spark/list_bucket_dml_10.q.out @@ -26,10 +26,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -123,6 +145,41 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -156,6 +213,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from src PREHOOK: type: QUERY @@ -198,7 +263,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out index 08df5d3..bc2e336 100644 --- a/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out +++ b/ql/src/test/results/clientpositive/spark/list_bucket_dml_2.q.out @@ -26,10 +26,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -174,6 +196,41 @@ STAGE PLANS: Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [srcpart] /srcpart/ds=2008-04-08/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -207,6 +264,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.list_bucketing_static_part + Is Table Level Stats: false + PREHOOK: query: insert overwrite table list_bucketing_static_part partition (ds = '2008-04-08', hr = '11') select key, value from srcpart where ds = '2008-04-08' PREHOOK: type: QUERY @@ -253,7 +318,7 @@ Database: default Table: list_bucketing_static_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 6 numRows 1000 rawDataSize 9624 @@ -314,7 +379,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out index 5bf7f28..20eaf35 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part1.q.out @@ -56,15 +56,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -84,6 +89,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -99,6 +120,72 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -116,6 +203,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out index 5ec6d68..4a46824 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part10.q.out @@ -46,10 +46,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -69,6 +72,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part10 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2008-12-31' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -86,6 +124,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part10 + PREHOOK: query: from srcpart insert overwrite table nzhang_part10 partition(ds='2008-12-31', hr) select key, value, hr where ds > '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out index 7f403d5..9f37175 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part13.q.out @@ -60,10 +60,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2), Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -86,7 +89,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 - Map 2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -106,6 +125,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part13 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-03' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -123,6 +177,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part13 + PREHOOK: query: insert overwrite table nzhang_part13 partition (ds="2010-03-03", hr) select * from ( select key, value, '22' diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out index ce65e71..5315b1d 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part14.q.out @@ -47,14 +47,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 1 (GROUP, 1) - Reducer 6 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 2), Reducer 5 (GROUP, 2), Reducer 7 (GROUP, 2) + Reducer 5 <- Map 1 (GROUP, 1) + Reducer 7 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -70,7 +72,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - TopN Hash Memory Usage: 0.1 Reducer 2 Reduce Operator Tree: Limit @@ -88,7 +89,42 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 1130 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Limit Number of rows: 2 @@ -105,7 +141,23 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Limit Number of rows: 2 @@ -122,6 +174,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part14 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 6 Data size: 941 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 2 Data size: 1154 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: struct) Stage: Stage-0 Move Operator @@ -138,6 +206,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part14 + PREHOOK: query: insert overwrite table nzhang_part14 partition(value) select key, value from ( select * from (select 'k1' as key, cast(null as string) as value from src limit 2)a diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out index d066b3a..724feee 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part2.q.out @@ -38,12 +38,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 10) + Reducer 3 <- Reducer 2 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -74,6 +76,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part_bucket + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '2010-03-23' (type: string), _col2 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -91,6 +128,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part_bucket + PREHOOK: query: insert overwrite table nzhang_part_bucket partition (ds='2010-03-23', hr) select key, value, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out index d120963..67b70ed 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part3.q.out @@ -44,10 +44,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +70,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,6 +122,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part3 + PREHOOK: query: insert overwrite table nzhang_part3 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out index 7ec76b5..03a2b1d 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part4.q.out @@ -54,10 +54,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +80,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +132,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part4 + PREHOOK: query: insert overwrite table nzhang_part4 partition (ds, hr) select key, value, ds, hr from srcpart where ds is not null and hr is not null PREHOOK: type: QUERY PREHOOK: Input: default@srcpart diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out index b132a59..e8713a2 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part5.q.out @@ -31,10 +31,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -54,6 +57,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: value (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -70,6 +108,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.nzhang_part5 + PREHOOK: query: insert overwrite table nzhang_part5 partition (value) select key, value from src PREHOOK: type: QUERY PREHOOK: Input: default@src diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out index e19a986..be9eb0c 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part8.q.out @@ -48,15 +48,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 2) + Reducer 3 <- Map 5 (GROUP, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -99,6 +104,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (ds > '2008-04-08') (type: boolean) @@ -337,6 +361,309 @@ STAGE PLANS: /srcpart/ds=2008-04-08/hr=12 [srcpart] /srcpart/ds=2008-04-09/hr=11 [srcpart] /srcpart/ds=2008-04-09/hr=12 [srcpart] + Map 5 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (ds > '2008-04-08') (type: boolean) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string), hr (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + null sort order: aa + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col2 (type: struct), _col3 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 12 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:string:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -370,6 +697,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part8 + Is Table Level Stats: false + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out b/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out index 55bcfae..5cf3034 100644 --- a/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out +++ b/ql/src/test/results/clientpositive/spark/load_dyn_part9.q.out @@ -46,10 +46,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -69,6 +72,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -86,6 +124,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part9 + PREHOOK: query: from srcpart insert overwrite table nzhang_part9 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/mapreduce1.q.out b/ql/src/test/results/clientpositive/spark/mapreduce1.q.out index d75b482..eafc1e5 100644 --- a/ql/src/test/results/clientpositive/spark/mapreduce1.q.out +++ b/ql/src/test/results/clientpositive/spark/mapreduce1.q.out @@ -26,12 +26,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -70,6 +72,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -84,6 +113,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/spark/mapreduce2.q.out b/ql/src/test/results/clientpositive/spark/mapreduce2.q.out index adfb503..ea93af2 100644 --- a/ql/src/test/results/clientpositive/spark/mapreduce2.q.out +++ b/ql/src/test/results/clientpositive/spark/mapreduce2.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -67,6 +69,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: string) + outputColumnNames: key, ten, one, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(ten, 16), compute_stats(one, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1932 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -81,6 +110,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, ten, one, value + Column Types: int, int, int, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 MAP src.key, CAST(src.key / 10 AS INT), CAST(src.key % 10 AS INT), src.value diff --git a/ql/src/test/results/clientpositive/spark/merge1.q.out b/ql/src/test/results/clientpositive/spark/merge1.q.out index d5b1e9f..015fd80 100644 --- a/ql/src/test/results/clientpositive/spark/merge1.q.out +++ b/ql/src/test/results/clientpositive/spark/merge1.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -29,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +124,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.dest1 + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -516,6 +552,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -523,6 +560,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -542,6 +581,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -565,6 +631,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -626,6 +699,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -633,6 +707,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -652,6 +728,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -675,6 +778,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.dest1 + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/merge2.q.out b/ql/src/test/results/clientpositive/spark/merge2.q.out index d780dc2..acb5c0d 100644 --- a/ql/src/test/results/clientpositive/spark/merge2.q.out +++ b/ql/src/test/results/clientpositive/spark/merge2.q.out @@ -20,6 +20,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -29,6 +30,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +74,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: key, val + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +124,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, val + Column Types: int, int + Table: default.test1 + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -516,6 +552,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -523,6 +560,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -542,6 +581,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -565,6 +631,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -626,6 +699,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -633,6 +707,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -652,6 +728,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: key + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -675,6 +778,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.test1 + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out index 79fc6ae..ae89612 100644 --- a/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out +++ b/ql/src/test/results/clientpositive/spark/metadata_only_queries.q.out @@ -183,52 +183,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -239,52 +199,12 @@ POSTHOOK: query: explain select count(*), sum(1), sum(0.2), count(1), count(s), count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -295,56 +215,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9999 Data size: 1030908 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -355,56 +231,12 @@ POSTHOOK: query: explain select count(*), '1' as one, sum(1), sum(0.2), 2 as two, count(1), count(s), 3+4.0 as three, count(bo), count(bin), count(si), max(i), min(b) from stats_tbl_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_tbl_part - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: s (type: string), bo (type: boolean), bin (type: binary), si (type: smallint), i (type: int), b (type: bigint) - outputColumnNames: _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 9489 Data size: 978785 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), sum(1), sum(0.2), count(1), count(_col2), count(_col3), count(_col4), count(_col5), max(_col6), min(_col7) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: decimal(11,1)), _col3 (type: bigint), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), count(VALUE._col3), count(VALUE._col4), count(VALUE._col5), count(VALUE._col6), count(VALUE._col7), max(VALUE._col8), min(VALUE._col9) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: bigint), '1' (type: string), _col1 (type: bigint), _col2 (type: decimal(11,1)), 2 (type: int), _col3 (type: bigint), _col4 (type: bigint), 7 (type: decimal(2,0)), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: int), _col9 (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/spark/multi_insert.q.out b/ql/src/test/results/clientpositive/spark/multi_insert.q.out index 33af962..477b6db 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert.q.out @@ -28,15 +28,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -56,6 +61,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -71,6 +89,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -85,6 +156,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -165,15 +250,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -193,6 +283,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -208,6 +311,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -222,6 +378,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -302,15 +472,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -330,6 +505,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -345,6 +533,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -359,6 +600,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -439,15 +694,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -467,6 +727,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -482,6 +755,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -496,6 +822,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -576,6 +916,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -583,7 +925,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -599,7 +944,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -619,6 +992,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -635,6 +1021,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -649,6 +1060,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -722,6 +1147,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -729,7 +1156,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -745,7 +1175,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -765,6 +1223,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -781,6 +1252,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -795,6 +1291,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -868,6 +1378,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -875,7 +1387,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -891,7 +1406,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -911,6 +1454,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -927,6 +1483,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -941,6 +1522,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1014,6 +1609,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -1021,7 +1618,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1037,7 +1637,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1057,6 +1685,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1073,6 +1714,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -1087,6 +1753,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1160,15 +1840,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1188,6 +1873,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1199,7 +1897,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1209,27 +1907,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1244,6 +1964,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1343,15 +2077,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1371,6 +2110,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1382,7 +2134,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1392,27 +2144,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1427,6 +2201,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1526,15 +2314,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1554,6 +2347,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1565,7 +2371,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1575,27 +2381,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1610,6 +2438,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1709,15 +2551,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1737,6 +2584,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1748,7 +2608,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1758,27 +2618,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1793,6 +2675,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out index d8c4b7f..0e2f15f 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby.q.out @@ -30,6 +30,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -37,7 +39,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -53,7 +58,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE @@ -78,6 +111,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 > 500) (type: boolean) Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE @@ -99,6 +145,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 332 Data size: 3527 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 > 500) (type: boolean) + Statistics: Num rows: 110 Data size: 1168 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -113,6 +189,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -217,6 +307,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-0 depends on stages: Stage-2 Stage-4 depends on stages: Stage-0 @@ -224,7 +316,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -241,7 +336,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -263,6 +386,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 > 450) (type: boolean) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE @@ -284,6 +420,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 > 450) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, count + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(count, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-1 Move Operator @@ -298,6 +464,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, count + Column Types: string, int + Table: default.e2 + Stage: Stage-0 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out index 81b882a..1654ad7 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby2.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -88,6 +90,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: count + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(count, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Group By Operator aggregations: percentile_approx(VALUE._col0, 0.5) mode: complete @@ -101,6 +123,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Select Operator + expressions: _col0 (type: double) + outputColumnNames: percentile + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(percentile, 16) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -115,6 +157,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: count + Column Types: int + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: percentile + Column Types: double + Table: default.e2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out index 92d10f4..ac6c8d3 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_gby3.q.out @@ -40,6 +40,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -48,7 +50,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Reducer 7 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -77,7 +82,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -99,6 +132,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -117,6 +163,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -131,6 +204,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -162,6 +249,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -170,7 +259,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Reducer 7 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -199,7 +291,35 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -221,6 +341,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -239,6 +372,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e2 + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -253,6 +413,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + Stage: Stage-1 Move Operator tables: @@ -1598,6 +1772,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -1605,11 +1781,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src @@ -1629,7 +1807,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 5 + Map 7 Map Operator Tree: TableScan alias: src @@ -1669,9 +1847,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -1689,6 +1894,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1703,6 +1935,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + Stage: Stage-1 Move Operator tables: @@ -1738,6 +1984,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -1747,8 +1996,13 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 10 <- Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 9 (GROUP, 1) + Reducer 5 <- Reducer 10 (GROUP, 1) + Reducer 6 <- Reducer 8 (GROUP, 1) + Reducer 7 <- Map 1 (SORT, 1) + Reducer 8 <- Map 1 (SORT, 1) + Reducer 9 <- Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1766,7 +2020,76 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: double), _col2 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double), _col2 (type: string) - Reducer 2 + Reducer 10 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col2 (type: double), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double), _col2 (type: string) + outputColumnNames: key, keyd, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1460 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1464 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 980 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) @@ -1796,7 +2119,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e3 - Reducer 3 + Reducer 8 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: double), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1) + keys: _col0 (type: string), _col1 (type: double), _col2 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToDouble(_col3) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 Reduce Operator Tree: Forward Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -1818,6 +2170,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.e1 + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: key, keyd + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(keyd, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1:0._col0 (type: string) @@ -1850,6 +2215,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, keyd, value + Column Types: string, double, string + Table: default.e2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, keyd + Column Types: string, double + Table: default.e3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out index 7b804da..9e975a7 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_lateral_view.q.out @@ -48,15 +48,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src_10 @@ -82,6 +87,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Select Operator expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 @@ -104,6 +122,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Lateral View Forward Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -147,6 +178,92 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Map 5 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Lateral View Forward + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator + expressions: array((key + 3),(key + 4)) (type: array) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + UDTF Operator + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + function name: explode + Lateral View Join Operator + outputColumnNames: _col0, _col5 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -161,6 +278,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + Stage: Stage-1 Move Operator tables: @@ -262,6 +393,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -269,11 +402,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: src_10 @@ -321,7 +456,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Map 5 + Map 7 Map Operator Tree: TableScan alias: src_10 @@ -389,9 +524,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -409,6 +571,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -423,6 +612,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + Stage: Stage-1 Move Operator tables: @@ -506,6 +709,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -515,11 +721,15 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 7 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 1) + Reducer 6 <- Reducer 10 (GROUP, 1) + Reducer 9 <- Map 8 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 4 + Map 7 Map Operator Tree: TableScan alias: src_10 @@ -567,7 +777,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: double) - Map 5 + Map 8 Map Operator Tree: TableScan alias: src_10 @@ -581,6 +791,36 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) + Reducer 10 + Reduce Operator Tree: + Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -601,8 +841,63 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -626,6 +921,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -661,6 +969,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + Stage: Stage-1 Move Operator tables: @@ -771,6 +1100,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -780,12 +1112,35 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT key) + keys: value (type: string), key (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Map 8 Map Operator Tree: TableScan alias: src_10 @@ -831,7 +1186,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 9 Map Operator Tree: TableScan alias: src_10 @@ -877,26 +1232,6 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: src_10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: key, value - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: sum(DISTINCT key) - keys: value (type: string), key (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Group By Operator @@ -917,9 +1252,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: double) mode: mergepartial @@ -937,7 +1299,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv2 - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: sum(DISTINCT KEY._col1:0._col0) @@ -957,6 +1346,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 5 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 992 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -971,6 +1387,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + Stage: Stage-1 Move Operator tables: @@ -1121,6 +1558,10 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8 + Stage-11 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8 + Stage-12 depends on stages: Stage-5, Stage-6, Stage-7, Stage-8 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -1132,12 +1573,17 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 5 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 12 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 13 <- Map 11 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 7 <- Reducer 12 (GROUP, 1) + Reducer 8 <- Reducer 13 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 Map Operator Tree: TableScan alias: src_10 @@ -1163,7 +1609,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: array((key + 1),(key + 2)) (type: array) + expressions: array((key + 3),(key + 4)) (type: array) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator @@ -1183,7 +1629,20 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 11 + Map Operator Tree: + TableScan + alias: src_10 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 200) or (key > 200)) (type: boolean) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string), key (type: string) + sort order: ++ + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Map 9 Map Operator Tree: TableScan alias: src_10 @@ -1209,7 +1668,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: array((key + 3),(key + 4)) (type: array) + expressions: array((key + 1),(key + 2)) (type: array) outputColumnNames: _col0 Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE UDTF Operator @@ -1229,60 +1688,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 208 Basic stats: COMPLETE Column stats: NONE - Map 7 - Map Operator Tree: - TableScan - alias: src_10 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((key < 200) or (key > 200)) (type: boolean) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: value (type: string), key (type: string) - sort order: ++ - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv1 - Reducer 3 - Reduce Operator Tree: - Group By Operator - aggregations: sum(DISTINCT KEY._col1:0._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), UDFToString(_col1) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_lv2 - Reducer 4 + Reducer 12 Reduce Operator Tree: Forward Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE @@ -1307,6 +1713,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv3 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: (KEY._col1:0._col0 < 200) (type: boolean) Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE @@ -1328,6 +1747,158 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_lv4 + Reducer 13 + Reduce Operator Tree: + Forward + Statistics: Num rows: 6 Data size: 62 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (KEY._col1:0._col0 < 200) (type: boolean) + Statistics: Num rows: 2 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToString(_col1) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.src_lv2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 104 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1252 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1342,6 +1913,34 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv1 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv2 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv3 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_lv4 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out index 2b28d53..37911c5 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_mixed.q.out @@ -38,6 +38,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -47,13 +50,14 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 6 (GROUP, 2) + Reducer 2 <- Map 7 (GROUP, 2) Reducer 3 <- Reducer 2 (SORT, 1) - Reducer 4 <- Map 7 (GROUP, 2) + Reducer 4 <- Map 8 (GROUP, 2) Reducer 5 <- Reducer 4 (SORT, 1) + Reducer 6 <- Map 9 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 6 + Map 7 Map Operator Tree: TableScan alias: src @@ -89,7 +93,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi3 - Map 7 + Map 8 Map Operator Tree: TableScan alias: src @@ -110,6 +114,31 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) + Map 9 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < 10) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -137,6 +166,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 4 Reduce Operator Tree: Group By Operator @@ -164,6 +213,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -178,6 +261,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out index 7f1d67b..ff53788 100644 --- a/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out +++ b/ql/src/test/results/clientpositive/spark/multi_insert_move_tasks_share_dependencies.q.out @@ -29,15 +29,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -57,6 +62,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -72,6 +90,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -89,6 +160,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -170,15 +255,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -198,6 +288,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -213,6 +316,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -230,6 +386,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -311,15 +481,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -339,6 +514,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -354,6 +542,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -371,6 +612,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -452,15 +707,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: src @@ -480,6 +740,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -495,6 +768,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 5 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -512,6 +838,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -593,6 +933,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -600,7 +942,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -616,7 +961,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -636,6 +1009,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -652,6 +1038,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -669,6 +1080,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -743,6 +1168,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -750,7 +1177,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -766,7 +1196,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -786,6 +1244,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -802,6 +1273,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -819,6 +1315,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -893,6 +1403,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -900,7 +1412,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -916,7 +1431,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -936,6 +1479,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -952,6 +1508,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -969,6 +1550,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1043,6 +1638,8 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 @@ -1050,7 +1647,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1066,7 +1666,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -1086,6 +1714,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 36 Data size: 382 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE @@ -1102,6 +1743,31 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((KEY._col0 > 10) and (KEY._col0 < 20)) (type: boolean) + Statistics: Num rows: 24 Data size: 254 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-3 Dependency Collection @@ -1119,6 +1785,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1193,15 +1873,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1221,6 +1906,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1232,7 +1930,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1242,27 +1940,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1280,6 +2000,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1380,46 +2114,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 - Spark -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator - predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 - Map 2 + Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 Map Operator Tree: TableScan alias: src @@ -1439,6 +2147,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1450,6 +2171,59 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 6 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1467,6 +2241,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1567,15 +2355,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1595,6 +2388,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1606,7 +2412,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1616,27 +2422,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1654,6 +2482,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -1754,15 +2596,20 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-4, Stage-5 + Stage-7 depends on stages: Stage-4, Stage-5 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 5 (GROUP, 1), Map 5 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 5 Map Operator Tree: TableScan alias: src @@ -1782,6 +2629,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE @@ -1793,7 +2653,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 - Map 2 + Map 6 Map Operator Tree: TableScan alias: src @@ -1803,27 +2663,49 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (_col0 < 10) (type: boolean) - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi1 - Filter Operator predicate: ((_col0 > 10) and (_col0 < 20)) (type: boolean) Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 111 Data size: 1179 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.src_multi2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-3 Dependency Collection @@ -1841,6 +2723,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -2784,6 +3680,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 + Stage-9 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -2793,11 +3691,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -2817,6 +3717,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -2832,6 +3745,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -2842,6 +3785,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -2873,7 +3844,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -2903,6 +3874,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3004,6 +3989,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 + Stage-9 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -3013,11 +4000,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -3037,6 +4026,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3052,6 +4054,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3062,6 +4094,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3093,7 +4153,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3123,6 +4183,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3224,6 +4298,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 + Stage-9 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -3233,11 +4309,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -3257,6 +4335,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3272,6 +4363,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3282,6 +4403,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3313,7 +4462,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3343,6 +4492,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: @@ -3444,6 +4607,8 @@ STAGE DEPENDENCIES: Stage-5 depends on stages: Stage-4 Stage-0 depends on stages: Stage-5 Stage-6 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 + Stage-9 depends on stages: Stage-6, Stage-7, Stage-2, Stage-3 Stage-1 depends on stages: Stage-5 Stage-7 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -3453,11 +4618,13 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 6 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 1) + Reducer 4 <- Map 8 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: - Map 1 + Map 6 Map Operator Tree: TableScan alias: src @@ -3477,6 +4644,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((key > 10) and (key < 20)) (type: boolean) Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE @@ -3492,6 +4672,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_multi2 + Map 7 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 10) and (key < 20)) (type: boolean) + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 55 Data size: 584 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 8 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key > 10) and (key < 20)) or (key < 10)) (type: boolean) Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE @@ -3502,6 +4712,34 @@ STAGE PLANS: Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: Forward Statistics: Num rows: 221 Data size: 2347 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -3533,7 +4771,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 12 Data size: 127 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 3 + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string), KEY.reducesinkkey0 (type: string) @@ -3563,6 +4801,20 @@ STAGE PLANS: Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_multi2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out b/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out index a146a8e..765c103 100644 --- a/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out +++ b/ql/src/test/results/clientpositive/spark/multigroupby_singlemr.q.out @@ -52,6 +52,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -59,11 +61,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: tbl @@ -84,7 +88,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: tbl @@ -125,9 +129,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -145,6 +176,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -159,6 +217,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -186,6 +258,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -193,11 +267,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: tbl @@ -218,7 +294,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col1 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: tbl @@ -259,9 +335,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -279,6 +382,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -293,6 +423,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -320,6 +464,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -327,11 +473,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 4 (GROUP, 2) - Reducer 3 <- Map 5 (GROUP, 2) + Reducer 2 <- Map 6 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan alias: tbl @@ -352,7 +500,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) - Map 5 + Map 7 Map Operator Tree: TableScan alias: tbl @@ -393,9 +541,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -413,6 +588,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -427,6 +629,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + Stage: Stage-1 Move Operator tables: @@ -454,6 +670,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -461,7 +679,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 5 (GROUP, 1) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -479,7 +700,35 @@ STAGE PLANS: Map-reduce partition columns: c1 (type: int), c2 (type: int), c3 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: c4 (type: int) - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Forward Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE @@ -501,6 +750,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) @@ -519,6 +781,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest4 + Reducer 6 + Reduce Operator Tree: + Forward + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col2 (type: int), KEY._col1 (type: int) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col2 (type: int), _col1 (type: int), UDFToInteger(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Stage: Stage-0 Move Operator @@ -533,6 +822,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest4 + Stage: Stage-1 Move Operator tables: @@ -562,6 +865,9 @@ STAGE DEPENDENCIES: Stage-3 is a root stage Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-8 depends on stages: Stage-4, Stage-5, Stage-6 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6 Stage-1 depends on stages: Stage-3 Stage-5 depends on stages: Stage-1 Stage-2 depends on stages: Stage-3 @@ -571,12 +877,36 @@ STAGE PLANS: Stage: Stage-3 Spark Edges: - Reducer 2 <- Map 5 (GROUP, 2) - Reducer 3 <- Map 6 (GROUP, 2) - Reducer 4 <- Map 7 (GROUP, 2) + Reducer 2 <- Map 8 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 9 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) + Reducer 6 <- Map 10 (GROUP, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 + Map Operator Tree: + TableScan + alias: tbl + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: c1 (type: int), c2 (type: int) + outputColumnNames: c1, c2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: count(c2) + keys: c1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col1 (type: bigint) + Map 8 Map Operator Tree: TableScan alias: tbl @@ -597,7 +927,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col3 (type: bigint) - Map 6 + Map 9 Map Operator Tree: TableScan alias: tbl @@ -618,27 +948,6 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col2 (type: bigint) - Map 7 - Map Operator Tree: - TableScan - alias: tbl - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Select Operator - expressions: c1 (type: int), c2 (type: int) - outputColumnNames: c1, c2 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Group By Operator - aggregations: count(c2) - keys: c1 (type: int) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE - value expressions: _col1 (type: bigint) Reducer 2 Reduce Operator Tree: Group By Operator @@ -659,9 +968,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest3 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col3 (type: int) + outputColumnNames: d1, d2, d3, d4 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16), compute_stats(d4, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1904 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1920 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: int), KEY._col1 (type: int) mode: mergepartial @@ -679,7 +1015,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 4 + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int) + outputColumnNames: d1, d2, d3 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16), compute_stats(d3, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -699,6 +1062,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: int) + outputColumnNames: d1, d2 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(d1, 16), compute_stats(d2, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -713,6 +1103,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3, d4 + Column Types: int, int, int, int + Table: default.dest3 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: d1, d2, d3 + Column Types: int, int, int + Table: default.dest2 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: d1, d2 + Column Types: int, int + Table: default.dest1 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/orc_merge1.q.out b/ql/src/test/results/clientpositive/spark/orc_merge1.q.out index 1407616..b691854 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge1.q.out @@ -54,10 +54,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +80,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +132,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -129,6 +174,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +182,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -155,6 +203,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -181,6 +264,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -252,6 +342,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -259,6 +350,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -278,6 +371,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -304,6 +432,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge2.q.out b/ql/src/test/results/clientpositive/spark/orc_merge2.q.out index b7f1a65..b4ac6cc 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge2.q.out @@ -30,6 +30,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -37,6 +38,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -56,6 +59,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge2a + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string), UDFToString(_col3) (type: string) + outputColumnNames: key, value, one, two, three + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: one (type: string), two (type: string), three (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col3 (type: struct), _col4 (type: struct), _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -83,6 +121,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge2a + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge3.q.out b/ql/src/test/results/clientpositive/spark/orc_merge3.q.out index 81a6013..189173a 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge3.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge3.q.out @@ -60,6 +60,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -67,6 +68,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -86,6 +89,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -109,6 +139,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge4.q.out b/ql/src/test/results/clientpositive/spark/orc_merge4.q.out index 8d433b0..eb2aae4 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge4.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge4.q.out @@ -78,6 +78,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -85,6 +86,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,6 +107,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orcfile_merge3b + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 94000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -127,6 +157,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge3b + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge5.q.out b/ql/src/test/results/clientpositive/spark/orc_merge5.q.out index 1f8c869..6c8c83c 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge5.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -57,6 +60,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -71,6 +101,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -115,6 +152,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -122,6 +160,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -145,6 +185,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -168,6 +235,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge6.q.out b/ql/src/test/results/clientpositive/spark/orc_merge6.q.out index be62fae..359341a 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge6.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -57,6 +60,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -74,6 +112,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (year="2000",hour=24) select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -160,6 +205,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -167,6 +213,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -190,6 +238,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), '2000' (type: string), UDFToInteger('24') (type: int) + outputColumnNames: userid, string1, subtype, decimal1, ts, year, hour + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: year (type: string), hour (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: int) + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: string), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 153 Data size: 41022 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -216,6 +299,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge7.q.out b/ql/src/test/results/clientpositive/spark/orc_merge7.q.out index 01e3eac..d0daebd 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge7.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge7.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -53,6 +56,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 @@ -194,6 +239,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -201,6 +247,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -220,6 +268,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -245,6 +328,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out index 1407616..b691854 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out @@ -54,10 +54,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -77,6 +80,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -94,6 +132,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1 + PREHOOK: query: INSERT OVERWRITE TABLE orcfile_merge1 PARTITION (ds='1', part) SELECT key, value, PMOD(HASH(key), 2) as part FROM src @@ -129,6 +174,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -136,6 +182,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -155,6 +203,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -181,6 +264,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1b + Stage: Stage-3 Spark #### A masked pattern was here #### @@ -252,6 +342,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -259,6 +350,8 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -278,6 +371,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), UDFToString(_col2) (type: string) + outputColumnNames: key, value, ds, part + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), part (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -304,6 +432,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.orcfile_merge1c + Stage: Stage-3 Spark #### A masked pattern was here #### diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out index 65790c4..0f214f6 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge_incompat1.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -56,6 +59,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5b + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp) + outputColumnNames: userid, string1, subtype, decimal1, ts + Statistics: Num rows: 306 Data size: 82044 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2604 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2620 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -70,6 +100,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5b + PREHOOK: query: insert overwrite table orc_merge5b select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13 PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out index 52973c8..d861d47 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge_incompat2.q.out @@ -30,10 +30,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -53,6 +56,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_merge5a + Select Operator + expressions: _col0 (type: bigint), _col1 (type: string), _col2 (type: double), _col3 (type: decimal(10,0)), _col4 (type: timestamp), _col5 (type: double) + outputColumnNames: userid, string1, subtype, decimal1, ts, st + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(userid, 16), compute_stats(string1, 16), compute_stats(subtype, 16), compute_stats(decimal1, 16), compute_stats(ts, 16) + keys: st (type: double) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 919 Data size: 246402 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 459 Data size: 123066 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: userid, string1, subtype, decimal1, ts + Column Types: bigint, string, double, decimal(10,0), timestamp + Table: default.orc_merge5a + PREHOOK: query: insert overwrite table orc_merge5a partition (st) select userid,string1,subtype,decimal1,ts,subtype from orc_merge5 order by userid PREHOOK: type: QUERY PREHOOK: Input: default@orc_merge5 diff --git a/ql/src/test/results/clientpositive/spark/parallel.q.out b/ql/src/test/results/clientpositive/spark/parallel.q.out index e31fcf0..a0847e4 100644 --- a/ql/src/test/results/clientpositive/spark/parallel.q.out +++ b/ql/src/test/results/clientpositive/spark/parallel.q.out @@ -28,6 +28,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -36,7 +38,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 6 (GROUP, 1) + Reducer 5 <- Reducer 7 (GROUP, 1) + Reducer 6 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -70,7 +75,35 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 Reduce Operator Tree: Forward Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE @@ -87,6 +120,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_a + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete @@ -100,6 +146,28 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_b + Reducer 7 + Reduce Operator Tree: + Forward + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -114,6 +182,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_a + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_b + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out index 7fdd48d..4be8e0c 100644 --- a/ql/src/test/results/clientpositive/spark/parallel_join1.q.out +++ b/ql/src/test/results/clientpositive/spark/parallel_join1.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 3 (PARTITION-LEVEL SORT, 4) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 4), Map 4 (PARTITION-LEVEL SORT, 4) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -43,7 +45,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -83,6 +85,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -97,6 +126,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/pcr.q.out b/ql/src/test/results/clientpositive/spark/pcr.q.out index 77ac020..809754a 100644 --- a/ql/src/test/results/clientpositive/spark/pcr.q.out +++ b/ql/src/test/results/clientpositive/spark/pcr.q.out @@ -96,7 +96,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -143,7 +143,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -291,7 +291,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -338,7 +338,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -385,7 +385,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -571,7 +571,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -618,7 +618,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -768,7 +768,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -815,7 +815,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -967,7 +967,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1014,7 +1014,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1061,7 +1061,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1224,7 +1224,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1271,7 +1271,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1318,7 +1318,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1488,7 +1488,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1535,7 +1535,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1668,7 +1668,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1715,7 +1715,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1888,7 +1888,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1935,7 +1935,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -1982,7 +1982,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2182,7 +2182,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2229,7 +2229,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2370,7 +2370,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2445,7 +2445,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2639,7 +2639,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2714,7 +2714,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2917,7 +2917,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -2964,7 +2964,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3011,7 +3011,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3058,7 +3058,7 @@ STAGE PLANS: partition values: ds 2000-04-11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3243,7 +3243,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3290,7 +3290,7 @@ STAGE PLANS: partition values: ds 2000-04-09 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3337,7 +3337,7 @@ STAGE PLANS: partition values: ds 2000-04-10 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3501,15 +3501,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: pcr_t1 @@ -3551,6 +3556,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3598,7 +3619,7 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3639,6 +3660,144 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Map 5 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3672,6 +3831,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -3738,15 +3913,20 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-2 Spark + Edges: + Reducer 2 <- Map 4 (GROUP, 1) + Reducer 3 <- Map 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 4 Map Operator Tree: TableScan alias: pcr_t1 @@ -3771,7 +3951,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3792,6 +3972,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: 2 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Filter Operator isSamplingPred: false predicate: (key = 3) (type: boolean) @@ -3811,7 +4007,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3843,7 +4039,89 @@ STAGE PLANS: partition values: ds 2000-04-08 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows 20 + partition_columns ds + partition_columns.types string + rawDataSize 160 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + column.name.delimiter , + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + Map 5 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 3) (type: boolean) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: 3 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3884,6 +4162,66 @@ STAGE PLANS: name: default.pcr_t1 Truncated Path -> Alias: /pcr_t1/ds=2000-04-08 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -3894,7 +4232,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value @@ -3917,6 +4255,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t2 + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.pcr_t3 + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -3926,7 +4280,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count -1 column.name.delimiter , columns key,value diff --git a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out index 2b05a1c..d5624ad 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_join5.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_join5.q.out @@ -32,7 +32,7 @@ POSTHOOK: Lineage: t1.id1 SIMPLE [] POSTHOOK: Lineage: t1.id2 SIMPLE [] POSTHOOK: Lineage: t2.d SIMPLE [] POSTHOOK: Lineage: t2.id SIMPLE [] -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) @@ -51,8 +51,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -68,70 +68,71 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Map 5 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -148,7 +149,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from ( select a.*,b.d d1,c.d d2 from @@ -171,8 +172,8 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) - Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 1), Reducer 2 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 5 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -188,73 +189,74 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: string) Map 4 Map Operator Tree: TableScan - alias: b + alias: c Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((d <= 1) and id is not null) (type: boolean) + predicate: (d <= 1) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: id (type: string), d (type: int) - outputColumnNames: _col0, _col1 + expressions: d (type: int) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string), _col0 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col0 (type: string) + sort order: Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int) + value expressions: _col0 (type: int) Map 5 Map Operator Tree: TableScan - alias: c + alias: b Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (d <= 1) (type: boolean) + predicate: ((d <= 1) and id is not null) (type: boolean) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: d (type: int) - outputColumnNames: _col0 + expressions: id (type: string), d (type: int) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: + key expressions: _col0 (type: string), _col0 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col0 (type: string) Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int) + value expressions: _col1 (type: int) Reducer 2 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: string), _col1 (type: string) - 1 _col0 (type: string), _col0 (type: string) - outputColumnNames: _col0, _col1, _col3 - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 3 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int) + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int) Reducer 3 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col3, _col4 + 0 _col0 (type: string), _col1 (type: string) + 1 _col0 (type: string), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col4 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col3 > 1) or (_col4 > 1)) (type: boolean) + predicate: ((_col4 > 1) or (_col2 > 1)) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string), _col1 (type: string), _col3 (type: int), _col4 (type: int) + expressions: _col0 (type: string), _col1 (type: string), _col4 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -271,7 +273,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[14][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[11][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from ( select a.*,b.d d1,c.d d2 from t1 a join t2 b on (a.id1 = b.id) diff --git a/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out b/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out index 12b1724..def99b2 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_multi_insert.q.out @@ -48,6 +48,9 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -58,7 +61,12 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 7 (GROUP, 1) + Reducer 4 <- Reducer 8 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -75,7 +83,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 3 + Map 6 Map Operator Tree: TableScan alias: b @@ -88,14 +96,58 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -113,6 +165,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -157,6 +222,63 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) Stage: Stage-0 Move Operator @@ -171,6 +293,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + Stage: Stage-1 Move Operator tables: @@ -1321,6 +1464,9 @@ STAGE DEPENDENCIES: Stage-4 is a root stage Stage-0 depends on stages: Stage-4 Stage-5 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-9 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 + Stage-10 depends on stages: Stage-5, Stage-6, Stage-7, Stage-3 Stage-1 depends on stages: Stage-4 Stage-6 depends on stages: Stage-1 Stage-2 depends on stages: Stage-4 @@ -1331,7 +1477,12 @@ STAGE PLANS: Stage: Stage-4 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 7 (GROUP, 1) + Reducer 4 <- Reducer 8 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 2) + Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -1348,7 +1499,7 @@ STAGE PLANS: Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: value (type: string) - Map 3 + Map 6 Map Operator Tree: TableScan alias: b @@ -1361,14 +1512,58 @@ STAGE PLANS: sort order: + Map-reduce partition columns: key (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 2 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), '2008-04-08' (type: string), '12' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 30 Data size: 318 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Join Operator condition map: Inner Join 0 to 1 - keys: - 0 key (type: string) - 1 key (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE Filter Operator @@ -1386,6 +1581,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.mi1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 183 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Filter Operator predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE @@ -1430,6 +1638,63 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 100) and (_col0 < 200)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 9 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((_col0 >= 200) and (_col0 < 300)) (type: boolean) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: key + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: '2008-04-08' (type: string), '12' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2008-04-08' (type: string), '12' (type: string) + sort order: ++ + Map-reduce partition columns: '2008-04-08' (type: string), '12' (type: string) + Statistics: Num rows: 61 Data size: 648 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct) Stage: Stage-0 Move Operator @@ -1444,6 +1709,27 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi1 + + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.mi2 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.mi3 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/ptf.q.out b/ql/src/test/results/clientpositive/spark/ptf.q.out index 2e31fbd..5146bc8 100644 --- a/ql/src/test/results/clientpositive/spark/ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/ptf.q.out @@ -2865,6 +2865,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -2872,11 +2874,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2942,8 +2946,35 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(s, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reducer 4 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: Select Operator expressions: VALUE._col1 (type: string), KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: int) outputColumnNames: _col1, _col2, _col5 @@ -2978,7 +3009,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: sum_window_0 (type: bigint), _col5 (type: int) - Reducer 5 + Reducer 6 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col4 (type: int) @@ -3037,7 +3068,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.part_5 - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(s2, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(cud, 16), compute_stats(fv1, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3864 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3061,7 +3119,7 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: int), _col7 (type: double) - Reducer 7 + Reducer 9 Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey1 (type: string), KEY.reducesinkkey0 (type: string), VALUE._col3 (type: int), VALUE._col5 (type: double) @@ -3099,6 +3157,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out b/ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out index 93b026f..5a31ce7 100644 --- a/ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out +++ b/ql/src/test/results/clientpositive/spark/reduce_deduplicate.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -136,6 +137,41 @@ STAGE PLANS: TotalFiles: 2 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -170,6 +206,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.bucket5_1 + Is Table Level Stats: true + PREHOOK: query: insert overwrite table bucket5_1 select * from src cluster by key PREHOOK: type: QUERY @@ -252,12 +296,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -346,6 +392,60 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: bigint), _col5 (type: string), _col6 (type: string), '2010-03-29' (type: string) + outputColumnNames: aid, bid, t, ctime, etime, l, et, ds + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Group By Operator + aggregations: compute_stats(aid, 16), compute_stats(bid, 16), compute_stats(t, 16), compute_stats(ctime, 16), compute_stats(etime, 16), compute_stats(l, 16), compute_stats(et, 16) + keys: '2010-03-29' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: '2010-03-29' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2010-03-29' (type: string) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + keys: '2010-03-29' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), '2010-03-29' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types struct:struct:struct:struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -378,3 +478,11 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: aid, bid, t, ctime, etime, l, et + Column Types: string, string, int, string, bigint, string, string + Table: default.complex_tbl_1 + Is Table Level Stats: false + diff --git a/ql/src/test/results/clientpositive/spark/sample1.q.out b/ql/src/test/results/clientpositive/spark/sample1.q.out index b44a0e8..c03a4ed 100644 --- a/ql/src/test/results/clientpositive/spark/sample1.q.out +++ b/ql/src/test/results/clientpositive/spark/sample1.q.out @@ -20,10 +20,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2008-04-08' (type: string), '11' (type: string) + outputColumnNames: key, value, dt, hr + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16), compute_stats(dt, 16), compute_stats(hr, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 1952 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +144,36 @@ STAGE PLANS: name: default.srcpart Truncated Path -> Alias: /srcpart/ds=2008-04-08/hr=11 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 1956 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -158,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value, dt, hr + Column Types: int, string, string, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcpart TABLESAMPLE (BUCKET 1 OUT OF 1 ON rand()) s WHERE s.ds='2008-04-08' and s.hr='11' diff --git a/ql/src/test/results/clientpositive/spark/sample10.q.out b/ql/src/test/results/clientpositive/spark/sample10.q.out index d589216..40a7392 100644 --- a/ql/src/test/results/clientpositive/spark/sample10.q.out +++ b/ql/src/test/results/clientpositive/spark/sample10.q.out @@ -93,7 +93,7 @@ STAGE PLANS: ds 2008-04-08 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -143,7 +143,7 @@ STAGE PLANS: ds 2008-04-08 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -193,7 +193,7 @@ STAGE PLANS: ds 2008-04-09 hr 11 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , @@ -243,7 +243,7 @@ STAGE PLANS: ds 2008-04-09 hr 12 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 4 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/spark/sample2.q.out b/ql/src/test/results/clientpositive/spark/sample2.q.out index c109918..aa36593 100644 --- a/ql/src/test/results/clientpositive/spark/sample2.q.out +++ b/ql/src/test/results/clientpositive/spark/sample2.q.out @@ -18,10 +18,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,6 +143,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -157,6 +206,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2) s PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/sample4.q.out b/ql/src/test/results/clientpositive/spark/sample4.q.out index 31b9aa8..662f035 100644 --- a/ql/src/test/results/clientpositive/spark/sample4.q.out +++ b/ql/src/test/results/clientpositive/spark/sample4.q.out @@ -18,10 +18,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,6 +143,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -157,6 +206,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 2 on key) s PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/sample5.q.out b/ql/src/test/results/clientpositive/spark/sample5.q.out index 71e0f42..768fae8 100644 --- a/ql/src/test/results/clientpositive/spark/sample5.q.out +++ b/ql/src/test/results/clientpositive/spark/sample5.q.out @@ -20,10 +20,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +144,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -158,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 5 on key) s diff --git a/ql/src/test/results/clientpositive/spark/sample6.q.out b/ql/src/test/results/clientpositive/spark/sample6.q.out index 37545af..7d43246 100644 --- a/ql/src/test/results/clientpositive/spark/sample6.q.out +++ b/ql/src/test/results/clientpositive/spark/sample6.q.out @@ -18,10 +18,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -71,6 +74,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5301 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -124,6 +143,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -157,6 +206,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/sample7.q.out b/ql/src/test/results/clientpositive/spark/sample7.q.out index 34c293c..4dd65d2 100644 --- a/ql/src/test/results/clientpositive/spark/sample7.q.out +++ b/ql/src/test/results/clientpositive/spark/sample7.q.out @@ -20,10 +20,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -72,6 +75,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 166 Data size: 1760 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -125,6 +144,36 @@ STAGE PLANS: name: default.srcbucket Truncated Path -> Alias: /srcbucket/000000_0 [s] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -158,6 +207,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest1 + Is Table Level Stats: true + PREHOOK: query: INSERT OVERWRITE TABLE dest1 SELECT s.* FROM srcbucket TABLESAMPLE (BUCKET 1 OUT OF 4 on key) s WHERE s.key > 100 diff --git a/ql/src/test/results/clientpositive/spark/skewjoin.q.out b/ql/src/test/results/clientpositive/spark/skewjoin.q.out index fdcb4bb..bae6422 100644 --- a/ql/src/test/results/clientpositive/spark/skewjoin.q.out +++ b/ql/src/test/results/clientpositive/spark/skewjoin.q.out @@ -80,17 +80,19 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-4 depends on stages: Stage-1 , consists of Stage-5, Stage-0 - Stage-5 - Stage-3 depends on stages: Stage-5 + Stage-5 depends on stages: Stage-1 , consists of Stage-6, Stage-3 + Stage-6 + Stage-4 depends on stages: Stage-6 + Stage-3 depends on stages: Stage-4 Stage-0 depends on stages: Stage-3 Stage-2 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -110,7 +112,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 3 + Map 4 Map Operator Tree: TableScan alias: src2 @@ -151,15 +153,30 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-4 + Stage: Stage-5 Conditional Operator - Stage: Stage-5 + Stage: Stage-6 Spark #### A masked pattern was here #### Vertices: - Map 5 + Map 7 Map Operator Tree: TableScan Spark HashTable Sink Operator @@ -169,11 +186,11 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-3 + Stage: Stage-4 Spark #### A masked pattern was here #### Vertices: - Map 4 + Map 6 Map Operator Tree: TableScan Map Join Operator @@ -195,9 +212,52 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest_j1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work + Stage: Stage-3 + Spark + Edges: + Reducer 3 <- Map 5 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 5 + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-0 Move Operator tables: @@ -211,6 +271,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.dest_j1 + PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out index 9424361..b50baed 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_11.q.out @@ -50,12 +50,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -101,7 +103,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -183,6 +185,60 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -216,6 +272,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds = '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -1798,7 +1862,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -1871,7 +1935,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out index d0bb917..3199333 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_12.q.out @@ -62,12 +62,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -114,7 +116,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -197,6 +199,60 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 1650 Data size: 17529 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 825 Data size: 8764 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -231,6 +287,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT /*+ MAPJOIN(b) */ a.key, b.value FROM test_table1 a JOIN test_table2 b ON a.key = b.key AND a.ds = '1' AND b.ds >= '1' PREHOOK: type: QUERY PREHOOK: Input: default@test_table1 @@ -278,12 +342,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -330,7 +396,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} bucket_count 16 bucket_field_name key column.name.delimiter , @@ -413,6 +479,60 @@ STAGE PLANS: TotalFiles: 16 GatherStats: true MultiFileSpray: true + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 3392 Data size: 36194 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3392 Data size: 36194 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2' (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 3392 Data size: 36194 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1696 Data size: 18097 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -447,6 +567,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table3 + Is Table Level Stats: false + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '2') SELECT /*+mapjoin(b)*/ a.key, concat(a.value, b.value) FROM test_table3 a JOIN test_table1 b ON a.key = b.key AND a.ds = '1' AND b.ds='1' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out index d8bd846..6197f20 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out @@ -120,7 +120,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -143,7 +143,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -267,7 +267,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -290,7 +290,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -366,7 +366,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -389,7 +389,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out index d652926..59c5071 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -82,7 +82,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -105,7 +105,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -160,7 +160,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -183,7 +183,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -388,7 +388,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -411,7 +411,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -466,7 +466,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -489,7 +489,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -642,7 +642,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -665,7 +665,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -720,7 +720,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -743,7 +743,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -896,7 +896,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -919,7 +919,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -974,7 +974,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key @@ -997,7 +997,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","key2":"true","value":"true"}} SORTBUCKETCOLSPREFIX TRUE bucket_count 16 bucket_field_name key diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out index 6ed3c21..d5427fa 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_18.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,6 +82,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -213,12 +221,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -253,6 +263,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '2' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -269,6 +314,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' and a.key = 238 PREHOOK: type: QUERY @@ -348,6 +400,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -387,6 +440,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '2') SELECT a.key, a.value FROM test_table2 a WHERE a.ds = '2' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out index fb25015..29793d5 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_19.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,6 +82,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out index f35a33d..2bf9c94 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_20.q.out @@ -42,12 +42,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -79,6 +81,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '1' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '1' (type: string) + sort order: + + Map-reduce partition columns: '1' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '1' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -95,6 +132,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1') SELECT a.key, a.value, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -163,6 +207,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -202,6 +247,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, int, string + Table: default.test_table3 + PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1') SELECT a.value, a.key, a.value FROM test_table1 a WHERE a.ds = '1' PREHOOK: type: QUERY @@ -282,12 +334,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -319,6 +373,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value1, value2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16) + keys: '2' (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '2' (type: string) + sort order: + + Map-reduce partition columns: '2' (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + keys: '2' (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '2' (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -335,3 +424,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value1, value2 + Column Types: string, string, string + Table: default.test_table2 + diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out index 1a9118d..2964fc3 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_21.q.out @@ -42,6 +42,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -81,6 +82,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -111,12 +119,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -148,6 +158,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -164,6 +209,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -194,12 +246,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -230,6 +284,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -246,6 +335,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -276,12 +372,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -313,6 +411,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -329,6 +462,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -359,12 +499,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -396,6 +538,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -412,6 +589,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: drop table test_table2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@test_table2 @@ -442,12 +626,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -478,6 +664,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_table2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -494,3 +715,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out index d8bdef2..2cd07fc 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_22.q.out @@ -40,6 +40,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -77,6 +78,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY @@ -183,6 +191,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -220,6 +229,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.test_table2 + PREHOOK: query: INSERT OVERWRITE TABLE test_table2 SELECT * FROM test_table1 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out index 7cf3cf7..69f4587 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_6.q.out @@ -66,12 +66,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -92,7 +94,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -128,6 +130,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -142,6 +171,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY @@ -1238,12 +1274,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1264,7 +1302,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 500 Data size: 4812 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -1300,6 +1338,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 550 Data size: 5293 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -1314,6 +1379,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY @@ -2426,12 +2498,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2452,7 +2526,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -2488,6 +2562,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2502,6 +2603,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 PREHOOK: type: QUERY @@ -2530,12 +2638,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 3 (PARTITION-LEVEL SORT, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Map 4 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -2556,7 +2666,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 166 Data size: 1597 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -2592,6 +2702,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 182 Data size: 1756 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2606,6 +2743,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(b)*/ * from smb_bucket4_1 a join smb_bucket4_2 b on a.key = b.key where a.key>1000 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out b/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out index 622b950..49ccee7 100644 --- a/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out +++ b/ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out @@ -618,10 +618,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -649,6 +652,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.smb_join_results + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string) + outputColumnNames: k1, v1, k2, v2 + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(v1, 16), compute_stats(k2, 16), compute_stats(v2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -663,6 +693,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: k1, v1, k2, v2 + Column Types: int, string, int, string + Table: default.smb_join_results + PREHOOK: query: insert overwrite table smb_join_results select /*+mapjoin(a)*/ * from smb_bucket4_1 a full outer join smb_bucket4_2 b on a.key = b.key PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/stats0.q.out b/ql/src/test/results/clientpositive/spark/stats0.q.out index 6992ffa..6aed71e 100644 --- a/ql/src/test/results/clientpositive/spark/stats0.q.out +++ b/ql/src/test/results/clientpositive/spark/stats0.q.out @@ -18,10 +18,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -66,6 +69,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -117,6 +136,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -150,6 +199,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + PREHOOK: query: insert overwrite table stats_non_partitioned select * from src PREHOOK: type: QUERY @@ -700,10 +757,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -723,6 +783,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -739,6 +834,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src PREHOOK: type: QUERY @@ -1336,10 +1438,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1384,6 +1489,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1435,6 +1556,36 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -1468,6 +1619,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + PREHOOK: query: insert overwrite table stats_non_partitioned select * from src PREHOOK: type: QUERY @@ -2018,10 +2177,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -2041,6 +2203,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -2057,6 +2254,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/spark/stats1.q.out b/ql/src/test/results/clientpositive/spark/stats1.q.out index e691f51..94216db 100644 --- a/ql/src/test/results/clientpositive/spark/stats1.q.out +++ b/ql/src/test/results/clientpositive/spark/stats1.q.out @@ -24,12 +24,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 4 (GROUP, 1), Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -48,7 +50,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -65,6 +67,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -84,6 +99,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -98,6 +140,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: INSERT OVERWRITE TABLE tmptable SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 @@ -170,7 +219,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 26 rawDataSize 199 diff --git a/ql/src/test/results/clientpositive/spark/stats10.q.out b/ql/src/test/results/clientpositive/spark/stats10.q.out index 9c682fc..63159c7 100644 --- a/ql/src/test/results/clientpositive/spark/stats10.q.out +++ b/ql/src/test/results/clientpositive/spark/stats10.q.out @@ -18,12 +18,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -54,6 +56,41 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket3_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -70,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY @@ -423,7 +467,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -462,7 +506,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/stats14.q.out b/ql/src/test/results/clientpositive/spark/stats14.q.out index 8501746..1cd660c 100644 --- a/ql/src/test/results/clientpositive/spark/stats14.q.out +++ b/ql/src/test/results/clientpositive/spark/stats14.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/stats15.q.out b/ql/src/test/results/clientpositive/spark/stats15.q.out index 8501746..1cd660c 100644 --- a/ql/src/test/results/clientpositive/spark/stats15.q.out +++ b/ql/src/test/results/clientpositive/spark/stats15.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/stats18.q.out b/ql/src/test/results/clientpositive/spark/stats18.q.out index 4945808..de8918a 100644 --- a/ql/src/test/results/clientpositive/spark/stats18.q.out +++ b/ql/src/test/results/clientpositive/spark/stats18.q.out @@ -39,7 +39,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out index 359eea3..c537da5 100644 --- a/ql/src/test/results/clientpositive/spark/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/spark/stats_only_null.q.out @@ -73,52 +73,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -129,52 +89,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Spark - Edges: - Reducer 2 <- Map 1 (GROUP, 1) -#### A masked pattern was here #### - Vertices: - Map 1 - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reducer 2 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/spark/statsfs.q.out b/ql/src/test/results/clientpositive/spark/statsfs.q.out index d070e9a..16da734 100644 --- a/ql/src/test/results/clientpositive/spark/statsfs.q.out +++ b/ql/src/test/results/clientpositive/spark/statsfs.q.out @@ -176,7 +176,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -215,7 +215,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -343,7 +343,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -426,7 +426,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -466,7 +466,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out index 3807511..dc71bdd 100644 --- a/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/spark/subquery_multiinsert.q.out @@ -61,6 +61,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-1 depends on stages: Stage-2 Stage-3 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-0 depends on stages: Stage-2 Stage-4 depends on stages: Stage-0 @@ -68,14 +70,15 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 10 (PARTITION-LEVEL SORT, 1), Reducer 9 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Map 7 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) + Reducer 10 <- Map 9 (GROUP, 1) + Reducer 2 <- Map 11 (PARTITION-LEVEL SORT, 1), Reducer 10 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Map 8 (PARTITION-LEVEL SORT, 2), Reducer 2 (PARTITION-LEVEL SORT, 2) Reducer 4 <- Reducer 3 (SORT, 1) - Reducer 5 <- Map 11 (PARTITION-LEVEL SORT, 2), Map 6 (PARTITION-LEVEL SORT, 2) - Reducer 9 <- Map 8 (GROUP, 1) + Reducer 5 <- Map 12 (PARTITION-LEVEL SORT, 2), Map 7 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 10 + Map 11 Map Operator Tree: TableScan alias: b @@ -84,7 +87,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: key (type: string), value (type: string) - Map 11 + Map 12 Map Operator Tree: TableScan alias: b @@ -97,7 +100,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: key (type: string), value (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 + Map 7 Map Operator Tree: TableScan alias: a @@ -119,7 +122,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 7 + Map 8 Map Operator Tree: TableScan alias: s1 @@ -136,7 +139,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE - Map 8 + Map 9 Map Operator Tree: TableScan alias: s1 @@ -155,6 +158,26 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: 0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 2 Reduce Operator Tree: Join Operator @@ -207,6 +230,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Reducer 5 Reduce Operator Tree: Join Operator @@ -225,26 +268,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 - Reducer 9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (_col0 = 0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: 0 (type: bigint) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -259,6 +309,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + Stage: Stage-0 Move Operator tables: @@ -314,6 +378,8 @@ RUN: Stage-1:MOVE RUN: Stage-0:MOVE RUN: Stage-3:STATS RUN: Stage-4:STATS +RUN: Stage-5:COLUMNSTATS +RUN: Stage-6:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -491,9 +557,12 @@ INSERT OVERWRITE TABLE src_5 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-5 is a root stage - Stage-2 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-5 + Stage-2 depends on stages: Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-3, Stage-4 + Stage-8 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -501,10 +570,10 @@ STAGE PLANS: Stage: Stage-5 Spark Edges: - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: a @@ -527,7 +596,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) Local Work: Map Reduce Local Work - Map 4 + Map 5 Map Operator Tree: TableScan alias: s1 @@ -545,12 +614,102 @@ STAGE PLANS: 1 _col0 (type: string) Local Work: Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '2') and key is null) (type: boolean) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 7 + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 = 0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: 0 (type: bigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 + 1 + + Stage: Stage-6 + Spark + Edges: + Reducer 7 <- Map 6 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > '9') and value is not null) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 key (type: string), value (type: string) + 1 _col0 (type: string), _col1 (type: string) + Local Work: + Map Reduce Local Work Map 5 Map Operator Tree: TableScan alias: s1 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Filter Operator + predicate: (key > '2') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Spark HashTable Sink Operator + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Local Work: + Map Reduce Local Work + Map 6 + Map Operator Tree: + TableScan + alias: s1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator predicate: ((key > '2') and key is null) (type: boolean) Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -564,7 +723,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) - Reducer 6 + Reducer 7 Local Work: Map Reduce Local Work Reduce Operator Tree: @@ -591,10 +750,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (SORT, 1) + Reducer 2 <- Map 8 (GROUP, 1) + Reducer 3 <- Map 9 (SORT, 1) #### A masked pattern was here #### Vertices: - Map 1 + Map 8 Map Operator Tree: TableScan alias: b @@ -610,7 +770,7 @@ STAGE PLANS: 1 _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1 input vertices: - 1 Map 3 + 1 Map 4 Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -620,6 +780,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Local Work: + Map Reduce Local Work + Map 9 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 @@ -628,7 +808,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 6 + 1 Reducer 7 Statistics: Num rows: 500 Data size: 9812 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: @@ -638,7 +818,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col5 input vertices: - 1 Map 4 + 1 Map 5 Statistics: Num rows: 550 Data size: 10793 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: _col5 is null (type: boolean) @@ -656,6 +836,20 @@ STAGE PLANS: Map Reduce Local Work Reducer 2 Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 3 + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 @@ -668,6 +862,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -682,6 +896,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-7 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + Stage: Stage-1 Move Operator tables: @@ -733,11 +961,14 @@ POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:strin POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] RUN: Stage-5:MAPRED +RUN: Stage-6:MAPRED RUN: Stage-2:MAPRED RUN: Stage-0:MOVE RUN: Stage-1:MOVE RUN: Stage-3:STATS RUN: Stage-4:STATS +RUN: Stage-7:COLUMNSTATS +RUN: Stage-8:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -746,17 +977,6 @@ POSTHOOK: query: select * from src_4 POSTHOOK: type: QUERY POSTHOOK: Input: default@src_4 #### A masked pattern was here #### -90 val_90 -90 val_90 -90 val_90 -92 val_92 -95 val_95 -95 val_95 -96 val_96 -97 val_97 -97 val_97 -98 val_98 -98 val_98 PREHOOK: query: select * from src_5 PREHOOK: type: QUERY PREHOOK: Input: default@src_5 diff --git a/ql/src/test/results/clientpositive/spark/union10.q.out b/ql/src/test/results/clientpositive/spark/union10.q.out index 2b3afec..171595a 100644 --- a/ql/src/test/results/clientpositive/spark/union10.q.out +++ b/ql/src/test/results/clientpositive/spark/union10.q.out @@ -26,14 +26,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +54,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -68,7 +70,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -107,7 +109,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -130,7 +159,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +195,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -167,6 +222,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union12.q.out b/ql/src/test/results/clientpositive/spark/union12.q.out index e9cd26c..2c354e7 100644 --- a/ql/src/test/results/clientpositive/spark/union12.q.out +++ b/ql/src/test/results/clientpositive/spark/union12.q.out @@ -26,14 +26,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -52,7 +54,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -68,7 +70,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 5 + Map 6 Map Operator Tree: TableScan alias: s3 @@ -107,7 +109,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -130,7 +159,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +195,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -167,6 +222,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union17.q.out b/ql/src/test/results/clientpositive/spark/union17.q.out index bcb95e4..6152cde 100644 --- a/ql/src/test/results/clientpositive/spark/union17.q.out +++ b/ql/src/test/results/clientpositive/spark/union17.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,10 +41,12 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 8 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Map 7 (GROUP PARTITION-LEVEL SORT, 2), Reducer 9 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 8 <- Map 1 (GROUP, 1) - Reducer 9 <- Map 1 (GROUP, 1) + Reducer 10 <- Map 1 (GROUP, 1) + Reducer 11 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 8 (GROUP PARTITION-LEVEL SORT, 2), Reducer 10 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 9 (GROUP PARTITION-LEVEL SORT, 2), Reducer 11 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -61,7 +65,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 6 + Map 8 Map Operator Tree: TableScan alias: s2 @@ -81,7 +85,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Map 7 + Map 9 Map Operator Tree: TableScan alias: s2 @@ -101,6 +105,50 @@ STAGE PLANS: sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Reducer 10 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Reducer 11 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(DISTINCT substr(_col1, 5)) + keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL Reducer 3 Reduce Operator Tree: Group By Operator @@ -121,9 +169,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 4 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial @@ -141,50 +216,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 8 - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL - Reducer 9 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 6 Reduce Operator Tree: Group By Operator - aggregations: count(VALUE._col0) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(DISTINCT substr(_col1, 5)) - keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) - sort order: +++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -199,6 +257,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/union18.q.out b/ql/src/test/results/clientpositive/spark/union18.q.out index dad2050..6f53198 100644 --- a/ql/src/test/results/clientpositive/spark/union18.q.out +++ b/ql/src/test/results/clientpositive/spark/union18.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,7 +41,10 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 6 (GROUP, 1), Reducer 8 (GROUP, 1) + Reducer 4 <- Map 7 (GROUP, 1), Reducer 9 (GROUP, 1) + Reducer 8 <- Map 1 (GROUP, 1) + Reducer 9 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -58,7 +63,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 6 Map Operator Tree: TableScan alias: s2 @@ -76,6 +81,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -87,7 +105,61 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 2 + Map 7 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 8 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -107,6 +179,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -118,6 +203,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Stage: Stage-0 Move Operator @@ -132,6 +245,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/union19.q.out b/ql/src/test/results/clientpositive/spark/union19.q.out index 31795b2..5822ec7 100644 --- a/ql/src/test/results/clientpositive/spark/union19.q.out +++ b/ql/src/test/results/clientpositive/spark/union19.q.out @@ -32,6 +32,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -39,8 +41,11 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 1) - Reducer 3 <- Map 4 (GROUP, 2), Reducer 2 (GROUP, 2) + Reducer 10 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 7 (GROUP, 2), Reducer 9 (GROUP, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Map 8 (GROUP, 1), Reducer 10 (GROUP, 1) + Reducer 9 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -59,7 +64,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 4 + Map 7 Map Operator Tree: TableScan alias: s2 @@ -92,7 +97,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Reducer 2 + Map 8 + Map Operator Tree: + TableScan + alias: s2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 10 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -103,30 +134,23 @@ STAGE PLANS: expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count(_col1) - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL - value expressions: _col1 (type: bigint) Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator @@ -147,6 +171,82 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 9 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 'tst1' (type: string), UDFToString(_col0) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col1 (type: bigint) + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 Stage: Stage-0 Move Operator @@ -161,6 +261,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/union22.q.out b/ql/src/test/results/clientpositive/spark/union22.q.out index 5aa5358..8108ad9 100644 --- a/ql/src/test/results/clientpositive/spark/union22.q.out +++ b/ql/src/test/results/clientpositive/spark/union22.q.out @@ -75,13 +75,14 @@ STAGE DEPENDENCIES: Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-3 Spark #### A masked pattern was here #### Vertices: - Map 3 + Map 4 Map Operator Tree: TableScan alias: dst_union22_delta @@ -118,7 +119,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -162,6 +163,8 @@ STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2), Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -208,6 +211,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -219,7 +241,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -260,7 +282,7 @@ STAGE PLANS: name: default.dst_union22_delta Truncated Path -> Alias: /dst_union22_delta/ds=1 [dst_union22_delta] - Map 2 + Map 3 Map Operator Tree: TableScan alias: a @@ -287,7 +309,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col4, _col5 input vertices: - 1 Map 3 + 1 Map 4 Position of Big Table: 0 Statistics: Num rows: 182 Data size: 4062 Basic stats: COMPLETE Column stats: NONE Select Operator @@ -324,6 +346,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Local Work: Map Reduce Local Work Path -> Alias: @@ -337,7 +378,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -378,6 +419,41 @@ STAGE PLANS: name: default.dst_union22 Truncated Path -> Alias: /dst_union22/ds=1 [a] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -410,6 +486,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false + PREHOOK: query: insert overwrite table dst_union22 partition (ds='2') select * from ( diff --git a/ql/src/test/results/clientpositive/spark/union28.q.out b/ql/src/test/results/clientpositive/spark/union28.q.out index 7ee06fe..478545f 100644 --- a/ql/src/test/results/clientpositive/spark/union28.q.out +++ b/ql/src/test/results/clientpositive/spark/union28.q.out @@ -36,13 +36,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) - Reducer 5 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 4 (GROUP, 1), Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -66,7 +67,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -85,26 +99,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Reducer 5 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -123,6 +132,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -137,6 +159,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + PREHOOK: query: insert overwrite table union_subq_union select * from ( select key, value from src diff --git a/ql/src/test/results/clientpositive/spark/union29.q.out b/ql/src/test/results/clientpositive/spark/union29.q.out index 05c44d1..11a48ab 100644 --- a/ql/src/test/results/clientpositive/spark/union29.q.out +++ b/ql/src/test/results/clientpositive/spark/union29.q.out @@ -36,10 +36,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1), Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -63,48 +66,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 2 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Map 3 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -119,6 +107,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + PREHOOK: query: insert overwrite table union_subq_union select * from ( select key, value from src @@ -163,17 +158,17 @@ POSTHOOK: Input: default@union_subq_union 0 val_0 0 val_0 0 val_0 -0 val_0 -0 val_0 -0 val_0 -2 val_2 2 val_2 2 val_2 4 val_4 4 val_4 -4 val_4 5 val_5 5 val_5 5 val_5 5 val_5 5 val_5 +5 val_5 +8 val_8 +8 val_8 +9 val_9 +9 val_9 diff --git a/ql/src/test/results/clientpositive/spark/union30.q.out b/ql/src/test/results/clientpositive/spark/union30.q.out index 12eda1d..3cec27f 100644 --- a/ql/src/test/results/clientpositive/spark/union30.q.out +++ b/ql/src/test/results/clientpositive/spark/union30.q.out @@ -50,13 +50,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP, 2) - Reducer 5 <- Map 2 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Map 1 (GROUP, 1), Reducer 4 (GROUP, 1), Reducer 4 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -80,7 +81,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union - Map 2 + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -99,47 +113,21 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Map 6 - Map Operator Tree: - TableScan - alias: src - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: string), value (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Reducer 3 + Reducer 2 Reduce Operator Tree: Group By Operator - keys: KEY._col0 (type: string), KEY._col1 (type: string) + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: UDFToInteger(_col0) (type: int), _col1 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.union_subq_union - Reducer 5 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string), KEY._col1 (type: string) @@ -158,6 +146,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -172,6 +173,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + PREHOOK: query: insert overwrite table union_subq_union select * from ( @@ -229,14 +237,9 @@ POSTHOOK: Input: default@union_subq_union 0 val_0 0 val_0 0 val_0 -0 val_0 -0 val_0 -0 val_0 2 val_2 2 val_2 2 val_2 -2 val_2 -4 val_4 4 val_4 4 val_4 4 val_4 @@ -244,3 +247,8 @@ POSTHOOK: Input: default@union_subq_union 5 val_5 5 val_5 5 val_5 +5 val_5 +8 val_8 +8 val_8 +8 val_8 +9 val_9 diff --git a/ql/src/test/results/clientpositive/spark/union31.q.out b/ql/src/test/results/clientpositive/spark/union31.q.out index 1dc8db2..d5074e3 100644 --- a/ql/src/test/results/clientpositive/spark/union31.q.out +++ b/ql/src/test/results/clientpositive/spark/union31.q.out @@ -72,6 +72,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -79,26 +81,28 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 2 <- Map 5 (GROUP, 2), Map 7 (GROUP, 2) - Reducer 3 <- Map 6 (GROUP, 2), Map 8 (GROUP, 2) + Reducer 2 <- Map 7 (GROUP, 2), Map 9 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 10 (GROUP, 2), Map 8 (GROUP, 2) + Reducer 5 <- Reducer 4 (GROUP, 1) #### A masked pattern was here #### Vertices: - Map 5 + Map 10 Map Operator Tree: TableScan - alias: t1 + alias: t2 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + expressions: _col1 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string) + keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -108,7 +112,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 6 + Map 7 Map Operator Tree: TableScan alias: t1 @@ -118,12 +122,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col1 (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -133,22 +137,22 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 7 + Map 8 Map Operator Tree: TableScan - alias: t2 + alias: t1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + expressions: _col1 (type: string) + outputColumnNames: _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col0 (type: string) + keys: _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -158,7 +162,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 8 + Map 9 Map Operator Tree: TableScan alias: t2 @@ -168,12 +172,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col1 + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: count(1) - keys: _col1 (type: string) + keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 20 Data size: 140 Basic stats: COMPLETE Column stats: NONE @@ -203,9 +207,36 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 3 Reduce Operator Tree: Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Reduce Operator Tree: + Group By Operator aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial @@ -223,6 +254,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -237,6 +295,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 + Stage: Stage-1 Move Operator tables: @@ -354,6 +426,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -362,8 +436,11 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 5 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 5 <- Map 4 (GROUP, 2) + Reducer 4 <- Reducer 8 (GROUP, 1) + Reducer 5 <- Reducer 9 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 2) + Reducer 8 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 9 <- Reducer 2 (GROUP PARTITION-LEVEL SORT, 2), Reducer 7 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -387,7 +464,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -422,7 +499,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 Reduce Operator Tree: Forward Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE @@ -444,6 +563,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -462,20 +594,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 - Reducer 5 + Reducer 9 Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: bigint) + Forward + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -490,6 +635,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 + Stage: Stage-1 Move Operator tables: @@ -649,6 +808,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -657,7 +818,10 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 2) - Reducer 3 <- Map 4 (GROUP PARTITION-LEVEL SORT, 2), Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 7 (GROUP, 1) + Reducer 5 <- Reducer 8 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 8 <- Map 6 (GROUP PARTITION-LEVEL SORT, 2), Reducer 2 (GROUP PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -681,7 +845,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 6 Map Operator Tree: TableScan alias: t2 @@ -720,7 +884,35 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Reduce Operator Tree: Forward Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE @@ -742,6 +934,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -760,6 +965,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Reducer 8 + Reduce Operator Tree: + Forward + Statistics: Num rows: 11 Data size: 53 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -774,6 +1006,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 + Stage: Stage-1 Move Operator tables: diff --git a/ql/src/test/results/clientpositive/spark/union33.q.out b/ql/src/test/results/clientpositive/spark/union33.q.out index def5f69..2058493 100644 --- a/ql/src/test/results/clientpositive/spark/union33.q.out +++ b/ql/src/test/results/clientpositive/spark/union33.q.out @@ -28,13 +28,15 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 3 <- Map 2 (GROUP PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 3 (GROUP, 2) + Reducer 2 <- Map 1 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 4 <- Map 3 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 5 <- Reducer 4 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -57,7 +59,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src - Map 2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map 3 Map Operator Tree: TableScan alias: src @@ -78,7 +93,21 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 3 + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -92,7 +121,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Reducer 4 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -112,6 +141,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -126,6 +168,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, value FROM src @@ -181,6 +230,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -188,6 +238,7 @@ STAGE PLANS: Edges: Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Map 5 (GROUP, 1), Reducer 3 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -211,7 +262,7 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) - Map 4 + Map 5 Map Operator Tree: TableScan alias: src @@ -231,6 +282,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -265,6 +329,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -279,6 +370,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + PREHOOK: query: INSERT OVERWRITE TABLE test_src SELECT key, value FROM ( SELECT key, cast(COUNT(*) as string) AS value FROM src diff --git a/ql/src/test/results/clientpositive/spark/union4.q.out b/ql/src/test/results/clientpositive/spark/union4.q.out index 45705e9..7567315 100644 --- a/ql/src/test/results/clientpositive/spark/union4.q.out +++ b/ql/src/test/results/clientpositive/spark/union4.q.out @@ -22,13 +22,15 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -47,7 +49,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -86,7 +88,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -109,6 +138,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -123,6 +165,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, count(1) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union6.q.out b/ql/src/test/results/clientpositive/spark/union6.q.out index d419c9a..b189378 100644 --- a/ql/src/test/results/clientpositive/spark/union6.q.out +++ b/ql/src/test/results/clientpositive/spark/union6.q.out @@ -22,12 +22,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 4 (GROUP, 1), Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -46,7 +48,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) - Map 3 + Map 4 Map Operator Tree: TableScan alias: s2 @@ -63,6 +65,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) Reducer 2 Reduce Operator Tree: Group By Operator @@ -82,6 +97,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -96,6 +138,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: insert overwrite table tmptable select unionsrc.key, unionsrc.value FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/spark/union_lateralview.q.out b/ql/src/test/results/clientpositive/spark/union_lateralview.q.out index c223ac6..667ca3d 100644 --- a/ql/src/test/results/clientpositive/spark/union_lateralview.q.out +++ b/ql/src/test/results/clientpositive/spark/union_lateralview.q.out @@ -48,12 +48,14 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 3 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2), Map 4 (PARTITION-LEVEL SORT, 2), Map 5 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -104,7 +106,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: srcpart @@ -152,7 +154,7 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col2 (type: string) - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -184,6 +186,33 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_union_lateral_view + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, arr_ele, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(arr_ele, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -198,6 +227,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, arr_ele, value + Column Types: int, int, string + Table: default.test_union_lateral_view + PREHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view SELECT b.key, d.arr_ele, d.value FROM ( diff --git a/ql/src/test/results/clientpositive/spark/union_top_level.q.out b/ql/src/test/results/clientpositive/spark/union_top_level.q.out index 6adf6c4..47da227 100644 --- a/ql/src/test/results/clientpositive/spark/union_top_level.q.out +++ b/ql/src/test/results/clientpositive/spark/union_top_level.q.out @@ -564,14 +564,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -592,9 +594,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -612,9 +613,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -632,7 +632,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: @@ -655,7 +654,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -676,7 +702,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -697,6 +736,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -711,6 +763,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top + PREHOOK: query: insert into table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a union all @@ -768,14 +827,16 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark Edges: Reducer 2 <- Map 1 (GROUP, 1) - Reducer 4 <- Map 3 (GROUP, 1) - Reducer 6 <- Map 5 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1), Reducer 5 (GROUP, 1), Reducer 7 (GROUP, 1) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -796,9 +857,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 3 + Map 4 Map Operator Tree: TableScan alias: src @@ -816,9 +876,8 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) - Map 5 + Map 6 Map Operator Tree: TableScan alias: src @@ -836,7 +895,6 @@ STAGE PLANS: Reduce Output Operator sort order: Statistics: Num rows: 3 Data size: 30 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: string) Reducer 2 Reduce Operator Tree: @@ -859,7 +917,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -880,7 +965,20 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reducer 7 Reduce Operator Tree: Select Operator expressions: VALUE._col0 (type: string) @@ -901,6 +999,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_top + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 9 Data size: 90 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) Stage: Stage-0 Move Operator @@ -915,6 +1026,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.union_top + PREHOOK: query: insert overwrite table union_top select * from (select key, 0 as value from src where key % 3 == 0 limit 3)a union all diff --git a/ql/src/test/results/clientpositive/spark/vector_char_4.q.out b/ql/src/test/results/clientpositive/spark/vector_char_4.q.out index 943a4b1..a62370a 100644 --- a/ql/src/test/results/clientpositive/spark/vector_char_4.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_char_4.q.out @@ -135,10 +135,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -146,38 +149,56 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: char(20)), _col3 (type: char(30)), _col4 (type: char(20)), _col5 (type: char(20)), _col6 (type: char(50)) + outputColumnNames: ct, csi, ci, cb, cf, cd, cs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ct, 16), compute_stats(csi, 16), compute_stats(ci, 16), compute_stats(cb, 16), compute_stats(cf, 16), compute_stats(cd, 16), compute_stats(cs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -192,3 +213,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: ct, csi, ci, cb, cf, cd, cs + Column Types: char(10), char(10), char(20), char(30), char(20), char(20), char(50) + Table: default.char_lazy_binary_columnar + diff --git a/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out b/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out index 1c8e479..86978b9 100644 --- a/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_varchar_4.q.out @@ -135,10 +135,13 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -146,38 +149,56 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: varchar(20)), _col3 (type: varchar(30)), _col4 (type: varchar(20)), _col5 (type: varchar(20)), _col6 (type: varchar(50)) + outputColumnNames: vt, vsi, vi, vb, vf, vd, vs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(vt, 16), compute_stats(vsi, 16), compute_stats(vi, 16), compute_stats(vb, 16), compute_stats(vf, 16), compute_stats(vd, 16), compute_stats(vs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reducer 2 + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -192,3 +213,10 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: vt, vsi, vi, vb, vf, vd, vs + Column Types: varchar(10), varchar(10), varchar(20), varchar(30), varchar(20), varchar(20), varchar(50) + Table: default.varchar_lazy_binary_columnar + diff --git a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index 8b313c0..19750ce 100644 --- a/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ b/ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -173,7 +173,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -194,7 +194,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -445,7 +445,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -466,7 +466,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -523,7 +523,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -544,7 +544,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -795,7 +795,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -816,7 +816,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1005,7 +1005,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1026,7 +1026,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1278,7 +1278,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1299,7 +1299,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1554,7 +1554,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1575,7 +1575,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1832,7 +1832,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1853,7 +1853,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1910,7 +1910,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -1931,7 +1931,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2146,7 +2146,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2167,7 +2167,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2221,7 +2221,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2242,7 +2242,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2470,7 +2470,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2491,7 +2491,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2735,7 +2735,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -2756,7 +2756,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3005,7 +3005,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3026,7 +3026,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3277,7 +3277,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3298,7 +3298,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3622,7 +3622,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3643,7 +3643,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3898,7 +3898,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3919,7 +3919,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3976,7 +3976,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -3997,7 +3997,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4289,7 +4289,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4310,7 +4310,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4554,7 +4554,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4575,7 +4575,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4822,6 +4822,8 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3, Stage-4 + Stage-6 depends on stages: Stage-3, Stage-4 Stage-1 depends on stages: Stage-2 Stage-4 depends on stages: Stage-1 @@ -4829,11 +4831,13 @@ STAGE PLANS: Stage: Stage-2 Spark Edges: - Reducer 3 <- Reducer 6 (PARTITION-LEVEL SORT, 2) - Reducer 4 <- Reducer 7 (PARTITION-LEVEL SORT, 2) - Reducer 5 <- Reducer 4 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 1 (PARTITION-LEVEL SORT, 2) - Reducer 7 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 8 (PARTITION-LEVEL SORT, 2) + Reducer 4 <- Reducer 3 (GROUP, 1) + Reducer 5 <- Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 6 <- Reducer 5 (PARTITION-LEVEL SORT, 2) + Reducer 7 <- Reducer 6 (GROUP, 1) + Reducer 8 <- Map 1 (PARTITION-LEVEL SORT, 2) + Reducer 9 <- Map 1 (PARTITION-LEVEL SORT, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -4869,7 +4873,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4890,7 +4894,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -4993,11 +4997,62 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: double) + outputColumnNames: p_mfgr, p_name, p_size, r, dr, s + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(s, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 2888 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + auto parallelism: false Reducer 4 Needs Tagging: false Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types struct:struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 5 + Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true notVectorizedReason: PTF Operator (PTF) not supported vectorized: false Reduce Operator Tree: @@ -5038,7 +5093,7 @@ STAGE PLANS: tag: -1 value expressions: sum_window_0 (type: bigint), _col5 (type: int) auto parallelism: false - Reducer 5 + Reducer 6 Needs Tagging: false Reduce Vectorization: enabled: true @@ -5127,7 +5182,58 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false - Reducer 6 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: int), _col4 (type: int), _col5 (type: int), _col6 (type: double), _col7 (type: int) + outputColumnNames: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(p_mfgr, 16), compute_stats(p_name, 16), compute_stats(p_size, 16), compute_stats(s2, 16), compute_stats(r, 16), compute_stats(dr, 16), compute_stats(cud, 16), compute_stats(fv1, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + auto parallelism: false + Reducer 7 + Needs Tagging: false + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 3864 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 3864 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6,_col7 + columns.types struct:struct:struct:struct:struct:struct:struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + Reducer 8 Needs Tagging: false Reduce Vectorization: enabled: true @@ -5160,7 +5266,7 @@ STAGE PLANS: tag: -1 value expressions: _col5 (type: int), _col7 (type: double) auto parallelism: false - Reducer 7 + Reducer 9 Needs Tagging: false Reduce Vectorization: enabled: true @@ -5226,6 +5332,22 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, r, dr, s + Column Types: string, string, int, int, int, double + Table: default.part_4 + Is Table Level Stats: true + + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: p_mfgr, p_name, p_size, s2, r, dr, cud, fv1 + Column Types: string, string, int, int, int, int, double, int + Table: default.part_5 + Is Table Level Stats: true + Stage: Stage-1 Move Operator tables: @@ -5460,7 +5582,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -5481,7 +5603,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -5836,7 +5958,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -5857,7 +5979,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6211,7 +6333,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6232,7 +6354,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6559,7 +6681,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6580,7 +6702,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6953,7 +7075,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -6974,7 +7096,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -7313,7 +7435,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment @@ -7334,7 +7456,7 @@ STAGE PLANS: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"p_brand":"true","p_comment":"true","p_container":"true","p_mfgr":"true","p_name":"true","p_partkey":"true","p_retailprice":"true","p_size":"true","p_type":"true"}} bucket_count -1 column.name.delimiter , columns p_partkey,p_name,p_mfgr,p_brand,p_type,p_size,p_container,p_retailprice,p_comment diff --git a/ql/src/test/results/clientpositive/stats0.q.out b/ql/src/test/results/clientpositive/stats0.q.out index 01e00c8..4a9cf36 100644 --- a/ql/src/test/results/clientpositive/stats0.q.out +++ b/ql/src/test/results/clientpositive/stats0.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -63,6 +64,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -114,6 +131,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -147,6 +193,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + PREHOOK: query: insert overwrite table stats_non_partitioned select * from src PREHOOK: type: QUERY @@ -697,6 +751,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -717,6 +772,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -733,6 +822,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + PREHOOK: query: insert overwrite table stats_partitioned partition (ds='1') select * from src PREHOOK: type: QUERY @@ -1332,6 +1428,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -1380,6 +1477,22 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col0 (type: struct), _col1 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -1431,6 +1544,35 @@ STAGE PLANS: name: default.src Truncated Path -> Alias: /src [src] + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1 + columns.types struct:struct + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-7 Conditional Operator @@ -1473,6 +1615,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_non_partitioned + Is Table Level Stats: true + Stage: Stage-3 Map Reduce Map Operator Tree: @@ -2205,6 +2355,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -2228,6 +2379,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.stats_partitioned + Select Operator + expressions: _col0 (type: string), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -2253,6 +2438,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.stats_partitioned + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/stats1.q.out b/ql/src/test/results/clientpositive/stats1.q.out index 2d5b4f8..e56f8d3 100644 --- a/ql/src/test/results/clientpositive/stats1.q.out +++ b/ql/src/test/results/clientpositive/stats1.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-5 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-1 @@ -75,6 +76,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -92,6 +106,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -106,6 +146,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-5 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + PREHOOK: query: INSERT OVERWRITE TABLE tmptable SELECT unionsrc.key, unionsrc.value FROM (SELECT 'tst1' AS key, cast(count(1) AS string) AS value FROM src s1 @@ -178,7 +225,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 26 rawDataSize 199 diff --git a/ql/src/test/results/clientpositive/stats10.q.out b/ql/src/test/results/clientpositive/stats10.q.out index d1fe473..9e95d04 100644 --- a/ql/src/test/results/clientpositive/stats10.q.out +++ b/ql/src/test/results/clientpositive/stats10.q.out @@ -18,6 +18,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -48,6 +50,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.bucket3_1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string) + outputColumnNames: key, value, ds + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -64,6 +82,42 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.bucket3_1 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1') select * from src PREHOOK: type: QUERY @@ -414,7 +468,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 @@ -453,7 +507,7 @@ Database: default Table: bucket3_1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 2 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats14.q.out b/ql/src/test/results/clientpositive/stats14.q.out index 8501746..1cd660c 100644 --- a/ql/src/test/results/clientpositive/stats14.q.out +++ b/ql/src/test/results/clientpositive/stats14.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats15.q.out b/ql/src/test/results/clientpositive/stats15.q.out index 8501746..1cd660c 100644 --- a/ql/src/test/results/clientpositive/stats15.q.out +++ b/ql/src/test/results/clientpositive/stats15.q.out @@ -42,7 +42,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -186,7 +186,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -226,7 +226,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats18.q.out b/ql/src/test/results/clientpositive/stats18.q.out index 4945808..de8918a 100644 --- a/ql/src/test/results/clientpositive/stats18.q.out +++ b/ql/src/test/results/clientpositive/stats18.q.out @@ -39,7 +39,7 @@ Database: default Table: stats_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats4.q.out b/ql/src/test/results/clientpositive/stats4.q.out index 8f503a9..b4f04e1 100644 --- a/ql/src/test/results/clientpositive/stats4.q.out +++ b/ql/src/test/results/clientpositive/stats4.q.out @@ -48,16 +48,14 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3, Stage-9, Stage-10 + Stage-12 depends on stages: Stage-3, Stage-9, Stage-10 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 - Stage-14 depends on stages: Stage-2 , consists of Stage-11, Stage-10, Stage-12 - Stage-11 - Stage-1 depends on stages: Stage-11, Stage-10, Stage-13 + Stage-1 depends on stages: Stage-2 Stage-9 depends on stages: Stage-1 - Stage-10 - Stage-12 - Stage-13 depends on stages: Stage-12 + Stage-10 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-2 @@ -81,6 +79,22 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) + outputColumnNames: key, value, ds, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: ds (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) Filter Operator predicate: (ds > '2008-04-08') (type: boolean) Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE @@ -96,6 +110,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.nzhang_part2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, value, hr + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + keys: '2008-12-31' (type: string), hr (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -122,6 +170,20 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part1 + + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.nzhang_part2 + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -152,15 +214,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-14 - Conditional Operator - - Stage: Stage-11 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -181,31 +234,30 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan + Reduce Output Operator + key expressions: '2008-12-31' (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: '2008-12-31' (type: string), _col1 (type: string) + Statistics: Num rows: 666 Data size: 7075 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: struct), _col3 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: '2008-12-31' (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), '2008-12-31' (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false + Statistics: Num rows: 333 Data size: 3537 Basic stats: COMPLETE Column stats: NONE table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-12 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.nzhang_part2 - - Stage: Stage-13 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### PREHOOK: query: from srcpart insert overwrite table nzhang_part1 partition (ds, hr) select key, value, ds, hr where ds <= '2008-04-08' @@ -2305,7 +2357,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2345,7 +2397,7 @@ Database: default Table: nzhang_part1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2385,7 +2437,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -2425,7 +2477,7 @@ Database: default Table: nzhang_part2 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out b/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out index b855b38..3cafd54 100644 --- a/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out +++ b/ql/src/test/results/clientpositive/stats_empty_dyn_part.q.out @@ -16,6 +16,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -42,6 +43,40 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, part + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16) + keys: part (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -67,6 +102,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: string + Table: default.tmptable + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/stats_list_bucket.q.out b/ql/src/test/results/clientpositive/stats_list_bucket.q.out index 0c43b1b..e7b7b26 100644 --- a/ql/src/test/results/clientpositive/stats_list_bucket.q.out +++ b/ql/src/test/results/clientpositive/stats_list_bucket.q.out @@ -61,7 +61,7 @@ Database: default Table: stats_list_bucket #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 @@ -133,7 +133,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c1\":\"true\",\"c2\":\"true\"}} numFiles 4 numRows 500 rawDataSize 4812 diff --git a/ql/src/test/results/clientpositive/stats_only_null.q.out b/ql/src/test/results/clientpositive/stats_only_null.q.out index 88c2114..c537da5 100644 --- a/ql/src/test/results/clientpositive/stats_only_null.q.out +++ b/ql/src/test/results/clientpositive/stats_only_null.q.out @@ -73,46 +73,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_null - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink @@ -123,46 +89,12 @@ POSTHOOK: query: explain select count(*), count(a), count(b), count(c), count(d) from stats_null_part POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-0 depends on stages: Stage-1 + Stage-0 is a root stage STAGE PLANS: - Stage: Stage-1 - Map Reduce - Map Operator Tree: - TableScan - alias: stats_null_part - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: a (type: double), b (type: int), c (type: string), d (type: smallint) - outputColumnNames: a, b, c, d - Statistics: Num rows: 10 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count(), count(a), count(b), count(c), count(d) - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint) - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-0 Fetch Operator - limit: -1 + limit: 1 Processor Tree: ListSink diff --git a/ql/src/test/results/clientpositive/stats_partial_size.q.out b/ql/src/test/results/clientpositive/stats_partial_size.q.out index c779741..f9fe2f9 100644 --- a/ql/src/test/results/clientpositive/stats_partial_size.q.out +++ b/ql/src/test/results/clientpositive/stats_partial_size.q.out @@ -47,16 +47,16 @@ STAGE PLANS: Map Operator Tree: TableScan alias: sample_partitioned - Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: x (type: int), y (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) TableScan alias: sample @@ -81,10 +81,10 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4 Data size: 48 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/statsfs.q.out b/ql/src/test/results/clientpositive/statsfs.q.out index d070e9a..16da734 100644 --- a/ql/src/test/results/clientpositive/statsfs.q.out +++ b/ql/src/test/results/clientpositive/statsfs.q.out @@ -176,7 +176,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -215,7 +215,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -343,7 +343,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -426,7 +426,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 @@ -466,7 +466,7 @@ Database: default Table: t1 #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 500 rawDataSize 5312 diff --git a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out index b8923d8..de11c6b 100644 --- a/ql/src/test/results/clientpositive/subquery_multiinsert.q.out +++ b/ql/src/test/results/clientpositive/subquery_multiinsert.q.out @@ -58,18 +58,21 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-2 depends on stages: Stage-10 + Stage-11 is a root stage + Stage-2 depends on stages: Stage-11 Stage-3 depends on stages: Stage-2 Stage-4 depends on stages: Stage-3 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 + Stage-12 depends on stages: Stage-5, Stage-7, Stage-8 + Stage-13 depends on stages: Stage-5, Stage-7, Stage-8 Stage-6 depends on stages: Stage-2 Stage-0 depends on stages: Stage-6 Stage-7 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-6 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -221,6 +224,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -235,6 +258,20 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + Stage: Stage-6 Map Reduce Map Operator Tree: @@ -281,6 +318,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -295,6 +347,28 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -332,15 +406,18 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED +RUN: Stage-11:MAPRED RUN: Stage-2:MAPRED RUN: Stage-3:MAPRED RUN: Stage-6:MAPRED RUN: Stage-4:MAPRED RUN: Stage-0:MOVE +RUN: Stage-8:MAPRED RUN: Stage-1:MOVE RUN: Stage-7:STATS RUN: Stage-5:STATS +RUN: Stage-12:COLUMNSTATS +RUN: Stage-13:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 @@ -487,7 +564,7 @@ POSTHOOK: Input: default@src_5 199 val_199 199 val_199 2 val_2 -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: explain from src b @@ -518,22 +595,25 @@ INSERT OVERWRITE TABLE src_5 order by key POSTHOOK: type: QUERY STAGE DEPENDENCIES: - Stage-10 is a root stage - Stage-14 depends on stages: Stage-10 , consists of Stage-17, Stage-2 - Stage-17 has a backup stage: Stage-2 - Stage-13 depends on stages: Stage-17 - Stage-15 depends on stages: Stage-2, Stage-13 - Stage-4 depends on stages: Stage-15 + Stage-11 is a root stage + Stage-15 depends on stages: Stage-11 , consists of Stage-18, Stage-2 + Stage-18 has a backup stage: Stage-2 + Stage-14 depends on stages: Stage-18 + Stage-16 depends on stages: Stage-2, Stage-14 + Stage-4 depends on stages: Stage-16 Stage-1 depends on stages: Stage-4 Stage-5 depends on stages: Stage-1 - Stage-16 depends on stages: Stage-2, Stage-13 - Stage-12 depends on stages: Stage-16 - Stage-0 depends on stages: Stage-12 + Stage-19 depends on stages: Stage-5, Stage-7, Stage-8 + Stage-20 depends on stages: Stage-5, Stage-7, Stage-8 + Stage-17 depends on stages: Stage-2, Stage-14 + Stage-13 depends on stages: Stage-17 + Stage-0 depends on stages: Stage-13 Stage-7 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-13 Stage-2 STAGE PLANS: - Stage: Stage-10 + Stage: Stage-11 Map Reduce Map Operator Tree: TableScan @@ -576,10 +656,10 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-14 + Stage: Stage-15 Conditional Operator - Stage: Stage-17 + Stage: Stage-18 Map Reduce Local Work Alias -> Map Local Tables: $INTNAME @@ -593,7 +673,7 @@ STAGE PLANS: 0 1 - Stage: Stage-13 + Stage: Stage-14 Map Reduce Map Operator Tree: TableScan @@ -625,7 +705,7 @@ STAGE PLANS: Local Work: Map Reduce Local Work - Stage: Stage-15 + Stage: Stage-16 Map Reduce Local Work Alias -> Map Local Tables: sq_2:s1 @@ -687,6 +767,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_5 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 275 Data size: 5396 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-1 Move Operator @@ -701,7 +801,21 @@ STAGE PLANS: Stage: Stage-5 Stats-Aggr Operator - Stage: Stage-16 + Stage: Stage-19 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_4 + + Stage: Stage-20 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.src_5 + + Stage: Stage-17 Map Reduce Local Work Alias -> Map Local Tables: sq_1:a @@ -729,7 +843,7 @@ STAGE PLANS: 0 key (type: string), value (type: string) 1 _col0 (type: string), _col1 (type: string) - Stage: Stage-12 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan @@ -749,6 +863,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.src_4 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Local Work: Map Reduce Local Work @@ -765,6 +894,28 @@ STAGE PLANS: Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-2 Map Reduce Map Operator Tree: @@ -804,7 +955,7 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe -Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-13:MAPRED' is a cross product +Warning: Map Join MAPJOIN[56][bigTable=b] in task 'Stage-14:MAPRED' is a cross product Warning: Shuffle Join JOIN[31][tables = [b, sq_2_notin_nullcheck]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: from src b INSERT OVERWRITE TABLE src_4 @@ -842,18 +993,21 @@ POSTHOOK: Lineage: src_4.key SIMPLE [(src)b.FieldSchema(name:key, type:string, c POSTHOOK: Lineage: src_4.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: src_5.key SIMPLE [(src)b.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: src_5.value SIMPLE [(src)b.FieldSchema(name:value, type:string, comment:default), ] -RUN: Stage-10:MAPRED -RUN: Stage-14:CONDITIONAL -RUN: Stage-17:MAPREDLOCAL -RUN: Stage-13:MAPRED -RUN: Stage-15:MAPREDLOCAL +RUN: Stage-11:MAPRED +RUN: Stage-15:CONDITIONAL +RUN: Stage-18:MAPREDLOCAL +RUN: Stage-14:MAPRED RUN: Stage-16:MAPREDLOCAL +RUN: Stage-17:MAPREDLOCAL RUN: Stage-4:MAPRED -RUN: Stage-12:MAPRED +RUN: Stage-13:MAPRED RUN: Stage-1:MOVE RUN: Stage-0:MOVE +RUN: Stage-8:MAPRED RUN: Stage-5:STATS RUN: Stage-7:STATS +RUN: Stage-19:COLUMNSTATS +RUN: Stage-20:COLUMNSTATS PREHOOK: query: select * from src_4 PREHOOK: type: QUERY PREHOOK: Input: default@src_4 diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out index 6602222..0675ed5 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_1.q.out @@ -120,21 +120,34 @@ POSTHOOK: query: explain analyze insert overwrite table t select key from src POSTHOOK: type: QUERY Plan optimized by CBO. -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.t"} - Stage-2 - Dependency Collection{} - Stage-1 - Map 1 - File Output Operator [FS_2] - table:{"name:":"default.t"} - Select Operator [SEL_1] (rows=500/500 width=87) - Output:["_col0"] - TableScan [TS_0] (rows=500/500 width=87) - default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + +Stage-4 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.t"} + Stage-2 + Dependency Collection{} + Stage-1 + Reducer 2 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=480) + Output:["_col0"],aggregations:["compute_stats(VALUE._col0, 16)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_2] + table:{"name:":"default.t"} + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["_col0"] + TableScan [TS_0] (rows=500/500 width=87) + default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=500/500 width=87) + Output:["key"] + Please refer to the previous Select Operator [SEL_1] PREHOOK: query: select key from src limit 10 PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out index ea572cd..0642942 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_2.q.out @@ -649,20 +649,20 @@ Stage-0 Stage-1 Map 1 File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_15] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=242/242 width=18) + Filter Operator [FIL_14] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=242/242 width=18) + Filter Operator [FIL_13] (rows=242/242 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value PREHOOK: type: QUERY @@ -691,34 +691,34 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_27] (rows=292/1166 width=18) + Merge Join Operator [MERGEJOIN_27] (rows=1080/1166 width=95) Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_25] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_25] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=18) + Filter Operator [FIL_23] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=18) + Filter Operator [FIL_22] (rows=242/242 width=95) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 4 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col1 - Select Operator [SEL_8] (rows=242/242 width=18) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col1"] - Filter Operator [FIL_24] (rows=242/242 width=18) + Filter Operator [FIL_24] (rows=242/242 width=91) predicate:value is not null - TableScan [TS_6] (rows=242/242 width=18) - default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + TableScan [TS_6] (rows=242/242 width=91) + default@tab,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key PREHOOK: type: QUERY @@ -748,20 +748,20 @@ Stage-0 Stage-1 Map 1 File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_15] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_15] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_14] (rows=242/242 width=18) + Filter Operator [FIL_14] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab2,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=242/242 width=18) + Filter Operator [FIL_13] (rows=242/242 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key join tab2 s2 on s1.value=s2.value PREHOOK: type: QUERY @@ -794,34 +794,34 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_16] - Merge Join Operator [MERGEJOIN_27] (rows=292/1166 width=18) + Merge Join Operator [MERGEJOIN_27] (rows=1080/1166 width=95) Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_25] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_25] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_23] (rows=242/242 width=18) + Filter Operator [FIL_23] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab2,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab2,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_22] (rows=242/242 width=18) + Filter Operator [FIL_22] (rows=242/242 width=95) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 4 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col1 - Select Operator [SEL_8] (rows=242/242 width=18) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col1"] - Filter Operator [FIL_24] (rows=242/242 width=18) + Filter Operator [FIL_24] (rows=242/242 width=91) predicate:value is not null - TableScan [TS_6] (rows=242/242 width=18) - default@tab2,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + TableScan [TS_6] (rows=242/242 width=91) + default@tab2,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key UNION ALL @@ -873,43 +873,43 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 3 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_22] - Merge Join Operator [MERGEJOIN_38] (rows=558/1646 width=18) + Merge Join Operator [MERGEJOIN_38] (rows=1892/1646 width=8) Conds:Union 2._col0=RS_19._col0(Inner) <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_19] PartitionCols:_col0 - Select Operator [SEL_17] (rows=500/500 width=18) + Select Operator [SEL_17] (rows=500/500 width=4) Output:["_col0"] - Filter Operator [FIL_35] (rows=500/500 width=18) + Filter Operator [FIL_35] (rows=500/500 width=4) predicate:key is not null - TableScan [TS_15] (rows=500/500 width=18) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + TableScan [TS_15] (rows=500/500 width=4) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Union 2 [SIMPLE_EDGE] <-Map 1 [CONTAINS] Reduce Output Operator [RS_18] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_36] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_36] (rows=500/480 width=4) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_33] (rows=242/242 width=18) + Filter Operator [FIL_33] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_32] (rows=242/242 width=18) + Filter Operator [FIL_32] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=18) + TableScan [TS_0] (rows=242/242 width=4) Output:["key"] <-Map 6 [CONTAINS] Reduce Output Operator [RS_18] PartitionCols:_col0 - Select Operator [SEL_12] (rows=242/242 width=18) + Select Operator [SEL_12] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_34] (rows=242/242 width=18) + Filter Operator [FIL_34] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_10] (rows=242/242 width=18) + TableScan [TS_10] (rows=242/242 width=4) Output:["key"] PREHOOK: query: select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value @@ -962,58 +962,58 @@ Stage-0 Output:["_col0"],aggregations:["count()"] <-Reducer 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_28] - Merge Join Operator [MERGEJOIN_50] (rows=587/3768 width=18) + Merge Join Operator [MERGEJOIN_50] (rows=3372/3768 width=8) Conds:Union 3._col0=RS_25._col0(Inner) <-Map 9 [SIMPLE_EDGE] SHUFFLE [RS_25] PartitionCols:_col0 - Select Operator [SEL_23] (rows=500/500 width=18) + Select Operator [SEL_23] (rows=500/500 width=4) Output:["_col0"] - Filter Operator [FIL_46] (rows=500/500 width=18) + Filter Operator [FIL_46] (rows=500/500 width=4) predicate:key is not null - TableScan [TS_21] (rows=500/500 width=18) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key"] + TableScan [TS_21] (rows=500/500 width=4) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] <-Union 3 [SIMPLE_EDGE] <-Map 8 [CONTAINS] Reduce Output Operator [RS_24] PartitionCols:_col0 - Select Operator [SEL_18] (rows=242/242 width=18) + Select Operator [SEL_18] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_45] (rows=242/242 width=18) + Filter Operator [FIL_45] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_16] (rows=242/242 width=18) + TableScan [TS_16] (rows=242/242 width=4) Output:["key"] <-Reducer 2 [CONTAINS] Reduce Output Operator [RS_24] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_49] (rows=292/1166 width=18) + Merge Join Operator [MERGEJOIN_49] (rows=1080/1166 width=4) Conds:RS_12._col1=RS_13._col1(Inner),Output:["_col0"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_12] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_47] (rows=266/480 width=18) + Merge Join Operator [MERGEJOIN_47] (rows=500/480 width=95) Conds:SEL_2._col0=SEL_5._col0(Inner),Output:["_col0","_col1"] - <-Select Operator [SEL_5] (rows=242/242 width=18) + <-Select Operator [SEL_5] (rows=242/242 width=4) Output:["_col0"] - Filter Operator [FIL_43] (rows=242/242 width=18) + Filter Operator [FIL_43] (rows=242/242 width=4) predicate:key is not null - TableScan [TS_3] (rows=242/242 width=18) - default@tab,s3,Tbl:COMPLETE,Col:NONE,Output:["key"] - <-Select Operator [SEL_2] (rows=242/242 width=18) + TableScan [TS_3] (rows=242/242 width=4) + default@tab,s3,Tbl:COMPLETE,Col:COMPLETE,Output:["key"] + <-Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_42] (rows=242/242 width=18) + Filter Operator [FIL_42] (rows=242/242 width=95) predicate:(key is not null and value is not null) - TableScan [TS_0] (rows=242/242 width=18) - default@tab,s1,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_0] (rows=242/242 width=95) + default@tab,s1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] <-Map 7 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col1 - Select Operator [SEL_8] (rows=242/242 width=18) + Select Operator [SEL_8] (rows=242/242 width=91) Output:["_col1"] - Filter Operator [FIL_44] (rows=242/242 width=18) + Filter Operator [FIL_44] (rows=242/242 width=91) predicate:value is not null - TableScan [TS_6] (rows=242/242 width=18) - default@tab,s2,Tbl:COMPLETE,Col:NONE,Output:["value"] + TableScan [TS_6] (rows=242/242 width=91) + default@tab,s2,Tbl:COMPLETE,Col:COMPLETE,Output:["value"] PREHOOK: query: CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -1127,234 +1127,299 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 13 <- Union 10 (CONTAINS) -Map 14 <- Union 10 (CONTAINS) -Map 21 <- Union 22 (CONTAINS) -Map 23 <- Union 22 (CONTAINS) -Map 24 <- Union 22 (CONTAINS) -Map 25 <- Union 22 (CONTAINS) -Map 6 <- Union 2 (CONTAINS) -Map 9 <- Union 10 (CONTAINS) -Reducer 11 <- Map 15 (SIMPLE_EDGE), Union 10 (SIMPLE_EDGE) -Reducer 12 <- Map 16 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 18 <- Map 17 (SIMPLE_EDGE), Map 20 (SIMPLE_EDGE) -Reducer 19 <- Reducer 18 (SIMPLE_EDGE), Union 22 (SIMPLE_EDGE), Union 5 (CONTAINS) -Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) -Reducer 4 <- Map 8 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Map 12 <- Union 13 (CONTAINS) +Map 16 <- Union 13 (CONTAINS) +Map 17 <- Union 13 (CONTAINS) +Map 24 <- Union 25 (CONTAINS) +Map 26 <- Union 25 (CONTAINS) +Map 27 <- Union 25 (CONTAINS) +Map 28 <- Union 25 (CONTAINS) +Map 9 <- Union 2 (CONTAINS) +Reducer 14 <- Map 18 (SIMPLE_EDGE), Union 13 (SIMPLE_EDGE) +Reducer 15 <- Map 19 (SIMPLE_EDGE), Reducer 14 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 21 <- Map 20 (SIMPLE_EDGE), Map 23 (SIMPLE_EDGE) +Reducer 22 <- Reducer 21 (SIMPLE_EDGE), Union 25 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 3 <- Map 10 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) +Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE), Union 5 (CONTAINS) +Reducer 6 <- Union 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Union 5 (CUSTOM_SIMPLE_EDGE) +Reducer 8 <- Union 5 (CUSTOM_SIMPLE_EDGE) -Stage-5 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.a"} - Stage-4 - Dependency Collection{} - Stage-3 - Union 5 - <-Reducer 12 [CONTAINS] - File Output Operator [FS_79] - table:{"name:":"default.a"} - Select Operator [SEL_45] (rows=5839/5421 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_126] (rows=5839/5421 width=178) - Conds:RS_42._col1=RS_43._col0(Inner),Output:["_col1","_col4"] - <-Map 16 [SIMPLE_EDGE] - SHUFFLE [RS_43] - PartitionCols:_col0 - Select Operator [SEL_38] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_116] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_36] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 11 [SIMPLE_EDGE] - SHUFFLE [RS_42] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_125] (rows=2394/2097 width=87) - Conds:Union 10._col0=RS_40._col1(Inner),Output:["_col1"] - <-Map 15 [SIMPLE_EDGE] - SHUFFLE [RS_40] - PartitionCols:_col1 - Select Operator [SEL_35] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_115] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - TableScan [TS_33] (rows=500/500 width=178) - default@src,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 10 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] - Reduce Output Operator [RS_39] - PartitionCols:_col0 - Select Operator [SEL_26] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_113] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_24] (rows=500/500 width=91) - Output:["value"] - <-Map 14 [CONTAINS] - Reduce Output Operator [RS_39] - PartitionCols:_col0 - Select Operator [SEL_31] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_114] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_29] (rows=500/500 width=91) - Output:["value"] - <-Map 9 [CONTAINS] - Reduce Output Operator [RS_39] - PartitionCols:_col0 - Select Operator [SEL_23] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_112] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_21] (rows=25/25 width=89) - Output:["value"] - File Output Operator [FS_81] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_45] - File Output Operator [FS_83] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_45] - <-Reducer 19 [CONTAINS] - File Output Operator [FS_79] - table:{"name:":"default.a"} - Select Operator [SEL_76] (rows=313/820 width=175) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_128] (rows=313/820 width=175) - Conds:RS_73._col1=Union 22._col0(Inner),Output:["_col0","_col3"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_73] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_127] (rows=44/115 width=264) - Conds:RS_70._col0=RS_71._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 17 [SIMPLE_EDGE] - SHUFFLE [RS_70] - PartitionCols:_col0 - Select Operator [SEL_50] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_117] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_48] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 20 [SIMPLE_EDGE] - SHUFFLE [RS_71] - PartitionCols:_col0 - Select Operator [SEL_53] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_118] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_51] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 22 [SIMPLE_EDGE] - <-Map 21 [CONTAINS] - Reduce Output Operator [RS_74] - PartitionCols:_col0 - Select Operator [SEL_56] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_119] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_54] (rows=25/25 width=89) - Output:["value"] - <-Map 23 [CONTAINS] - Reduce Output Operator [RS_74] - PartitionCols:_col0 - Select Operator [SEL_59] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_120] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_57] (rows=500/500 width=91) - Output:["value"] - <-Map 24 [CONTAINS] - Reduce Output Operator [RS_74] - PartitionCols:_col0 - Select Operator [SEL_64] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_121] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_62] (rows=500/500 width=91) - Output:["value"] - <-Map 25 [CONTAINS] - Reduce Output Operator [RS_74] - PartitionCols:_col0 - Select Operator [SEL_68] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_122] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_66] (rows=500/500 width=91) - Output:["value"] - File Output Operator [FS_81] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_76] - File Output Operator [FS_83] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_76] - <-Reducer 4 [CONTAINS] - File Output Operator [FS_79] - table:{"name:":"default.a"} - Select Operator [SEL_20] (rows=148/170 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_124] (rows=148/170 width=177) - Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col1","_col4"] - <-Map 8 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col0 - Select Operator [SEL_13] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_111] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_11] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_17] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_123] (rows=61/108 width=86) - Conds:Union 2._col0=RS_15._col1(Inner),Output:["_col1"] - <-Map 7 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col1 - Select Operator [SEL_10] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_110] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_8] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Select Operator [SEL_2] (rows=25/25 width=89) - Output:["_col0"] - Filter Operator [FIL_108] (rows=25/25 width=89) - predicate:value is not null - TableScan [TS_0] (rows=25/25 width=89) - Output:["value"] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=500/500 width=91) - Output:["_col0"] - Filter Operator [FIL_109] (rows=500/500 width=91) - predicate:value is not null - TableScan [TS_3] (rows=500/500 width=91) - Output:["value"] - File Output Operator [FS_81] - table:{"name:":"default.b"} - Please refer to the previous Select Operator [SEL_20] - File Output Operator [FS_83] - table:{"name:":"default.c"} - Please refer to the previous Select Operator [SEL_20] -Stage-6 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.b"} - Please refer to the previous Stage-4 -Stage-7 - Stats-Aggr Operator - Stage-2 - Move Operator - table:{"name:":"default.c"} - Please refer to the previous Stage-4 +Stage-8 + Column Stats Work{} + Stage-5 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.a"} + Stage-4 + Dependency Collection{} + Stage-3 + Reducer 6 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Union 5 [CUSTOM_SIMPLE_EDGE] + <-Reducer 15 [CONTAINS] + File Output Operator [FS_79] + table:{"name:":"default.a"} + Select Operator [SEL_45] (rows=5839/5421 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_126] (rows=5839/5421 width=178) + Conds:RS_42._col1=RS_43._col0(Inner),Output:["_col1","_col4"] + <-Map 19 [SIMPLE_EDGE] + SHUFFLE [RS_43] + PartitionCols:_col0 + Select Operator [SEL_38] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_116] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_36] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 14 [SIMPLE_EDGE] + SHUFFLE [RS_42] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_125] (rows=2394/2097 width=87) + Conds:Union 13._col0=RS_40._col1(Inner),Output:["_col1"] + <-Map 18 [SIMPLE_EDGE] + SHUFFLE [RS_40] + PartitionCols:_col1 + Select Operator [SEL_35] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_115] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_33] (rows=500/500 width=178) + default@src,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 13 [SIMPLE_EDGE] + <-Map 12 [CONTAINS] + Reduce Output Operator [RS_39] + PartitionCols:_col0 + Select Operator [SEL_23] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_112] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_21] (rows=25/25 width=89) + Output:["value"] + <-Map 16 [CONTAINS] + Reduce Output Operator [RS_39] + PartitionCols:_col0 + Select Operator [SEL_26] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_113] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_24] (rows=500/500 width=91) + Output:["value"] + <-Map 17 [CONTAINS] + Reduce Output Operator [RS_39] + PartitionCols:_col0 + Select Operator [SEL_31] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_114] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_29] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_45] + File Output Operator [FS_81] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_45] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_45] + File Output Operator [FS_83] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_45] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_45] + <-Reducer 22 [CONTAINS] + File Output Operator [FS_79] + table:{"name:":"default.a"} + Select Operator [SEL_76] (rows=313/820 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_128] (rows=313/820 width=175) + Conds:RS_73._col1=Union 25._col0(Inner),Output:["_col0","_col3"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_73] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_127] (rows=44/115 width=264) + Conds:RS_70._col0=RS_71._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 20 [SIMPLE_EDGE] + SHUFFLE [RS_70] + PartitionCols:_col0 + Select Operator [SEL_50] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_117] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_48] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 23 [SIMPLE_EDGE] + SHUFFLE [RS_71] + PartitionCols:_col0 + Select Operator [SEL_53] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_118] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_51] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 25 [SIMPLE_EDGE] + <-Map 24 [CONTAINS] + Reduce Output Operator [RS_74] + PartitionCols:_col0 + Select Operator [SEL_56] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_119] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_54] (rows=25/25 width=89) + Output:["value"] + <-Map 26 [CONTAINS] + Reduce Output Operator [RS_74] + PartitionCols:_col0 + Select Operator [SEL_59] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_120] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_57] (rows=500/500 width=91) + Output:["value"] + <-Map 27 [CONTAINS] + Reduce Output Operator [RS_74] + PartitionCols:_col0 + Select Operator [SEL_64] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_121] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_62] (rows=500/500 width=91) + Output:["value"] + <-Map 28 [CONTAINS] + Reduce Output Operator [RS_74] + PartitionCols:_col0 + Select Operator [SEL_68] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_122] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_66] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_76] + File Output Operator [FS_81] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_76] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_76] + File Output Operator [FS_83] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_76] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_76] + <-Reducer 4 [CONTAINS] + File Output Operator [FS_79] + table:{"name:":"default.a"} + Select Operator [SEL_20] (rows=148/170 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_124] (rows=148/170 width=177) + Conds:RS_17._col1=RS_18._col0(Inner),Output:["_col1","_col4"] + <-Map 11 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col0 + Select Operator [SEL_13] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_111] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_11] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_17] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_123] (rows=61/108 width=86) + Conds:Union 2._col0=RS_15._col1(Inner),Output:["_col1"] + <-Map 10 [SIMPLE_EDGE] + SHUFFLE [RS_15] + PartitionCols:_col1 + Select Operator [SEL_10] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_110] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_8] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_2] (rows=25/25 width=89) + Output:["_col0"] + Filter Operator [FIL_108] (rows=25/25 width=89) + predicate:value is not null + TableScan [TS_0] (rows=25/25 width=89) + Output:["value"] + <-Map 9 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=500/500 width=91) + Output:["_col0"] + Filter Operator [FIL_109] (rows=500/500 width=91) + predicate:value is not null + TableScan [TS_3] (rows=500/500 width=91) + Output:["value"] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_81] + table:{"name:":"default.b"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + File Output Operator [FS_83] + table:{"name:":"default.c"} + Please refer to the previous Select Operator [SEL_20] + Reduce Output Operator [RS_2] + Select Operator [SEL_1] (rows=6300/6411 width=178) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_20] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <- Please refer to the previous Union 5 [CUSTOM_SIMPLE_EDGE] + Reducer 8 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <- Please refer to the previous Union 5 [CUSTOM_SIMPLE_EDGE] + Stage-6 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.b"} + Please refer to the previous Stage-4 + Stage-7 + Stats-Aggr Operator + Stage-2 + Move Operator + table:{"name:":"default.c"} + Please refer to the previous Stage-4 +Stage-9 + Column Stats Work{} + Please refer to the previous Stage-5 + Please refer to the previous Stage-6 + Please refer to the previous Stage-7 +Stage-10 + Column Stats Work{} + Please refer to the previous Stage-5 + Please refer to the previous Stage-6 + Please refer to the previous Stage-7 PREHOOK: query: FROM ( @@ -1432,286 +1497,327 @@ Plan optimized by CBO. Vertex dependency in root stage Map 1 <- Union 2 (CONTAINS) -Map 10 <- Union 2 (CONTAINS) -Map 13 <- Union 14 (CONTAINS) -Map 20 <- Union 14 (CONTAINS) -Map 21 <- Union 16 (CONTAINS) -Map 28 <- Union 29 (CONTAINS) -Map 35 <- Union 29 (CONTAINS) -Map 36 <- Union 31 (CONTAINS) -Map 37 <- Union 33 (CONTAINS) -Reducer 15 <- Union 14 (SIMPLE_EDGE), Union 16 (CONTAINS) -Reducer 17 <- Union 16 (SIMPLE_EDGE) -Reducer 18 <- Map 22 (SIMPLE_EDGE), Reducer 17 (SIMPLE_EDGE) -Reducer 19 <- Map 23 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE), Union 6 (CONTAINS) -Reducer 25 <- Map 24 (SIMPLE_EDGE), Map 27 (SIMPLE_EDGE) -Reducer 26 <- Reducer 25 (SIMPLE_EDGE), Reducer 34 (SIMPLE_EDGE), Union 8 (CONTAINS) +Map 13 <- Union 2 (CONTAINS) +Map 16 <- Union 17 (CONTAINS) +Map 23 <- Union 17 (CONTAINS) +Map 24 <- Union 19 (CONTAINS) +Map 31 <- Union 32 (CONTAINS) +Map 38 <- Union 32 (CONTAINS) +Map 39 <- Union 34 (CONTAINS) +Map 40 <- Union 36 (CONTAINS) +Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 11 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 12 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) +Reducer 18 <- Union 17 (SIMPLE_EDGE), Union 19 (CONTAINS) +Reducer 20 <- Union 19 (SIMPLE_EDGE) +Reducer 21 <- Map 25 (SIMPLE_EDGE), Reducer 20 (SIMPLE_EDGE) +Reducer 22 <- Map 26 (SIMPLE_EDGE), Reducer 21 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 28 <- Map 27 (SIMPLE_EDGE), Map 30 (SIMPLE_EDGE) +Reducer 29 <- Reducer 28 (SIMPLE_EDGE), Reducer 37 (SIMPLE_EDGE), Union 8 (CONTAINS) Reducer 3 <- Union 2 (SIMPLE_EDGE) -Reducer 30 <- Union 29 (SIMPLE_EDGE), Union 31 (CONTAINS) -Reducer 32 <- Union 31 (SIMPLE_EDGE), Union 33 (CONTAINS) -Reducer 34 <- Union 33 (SIMPLE_EDGE) -Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) -Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) +Reducer 33 <- Union 32 (SIMPLE_EDGE), Union 34 (CONTAINS) +Reducer 35 <- Union 34 (SIMPLE_EDGE), Union 36 (CONTAINS) +Reducer 37 <- Union 36 (SIMPLE_EDGE) +Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) +Reducer 5 <- Map 15 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE), Union 6 (CONTAINS) Reducer 7 <- Union 6 (SIMPLE_EDGE), Union 8 (CONTAINS) Reducer 9 <- Union 8 (SIMPLE_EDGE) -Stage-5 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.a"} - Stage-4 - Dependency Collection{} - Stage-3 - Reducer 9 - File Output Operator [FS_115] - table:{"name:":"default.a"} - Group By Operator [GBY_112] (rows=6300/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 8 [SIMPLE_EDGE] - <-Reducer 26 [CONTAINS] - Reduce Output Operator [RS_111] - PartitionCols:_col0, _col1 - Select Operator [SEL_107] (rows=313/304 width=175) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_164] (rows=313/304 width=175) - Conds:RS_104._col1=RS_105._col1(Inner),Output:["_col0","_col3"] - <-Reducer 25 [SIMPLE_EDGE] - SHUFFLE [RS_104] - PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_163] (rows=44/115 width=264) - Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] - <-Map 24 [SIMPLE_EDGE] - SHUFFLE [RS_101] - PartitionCols:_col0 - Select Operator [SEL_67] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_153] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_65] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Map 27 [SIMPLE_EDGE] - SHUFFLE [RS_102] - PartitionCols:_col0 - Select Operator [SEL_70] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_154] (rows=25/25 width=175) - predicate:key is not null - TableScan [TS_68] (rows=25/25 width=175) - default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 34 [SIMPLE_EDGE] - SHUFFLE [RS_105] - PartitionCols:_col1 - Select Operator [SEL_100] (rows=1525/319 width=178) - Output:["_col1"] - Group By Operator [GBY_99] (rows=1525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 33 [SIMPLE_EDGE] - <-Map 37 [CONTAINS] - Reduce Output Operator [RS_98] - PartitionCols:_col1, _col0 - Select Operator [SEL_94] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_158] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_92] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 32 [CONTAINS] - Reduce Output Operator [RS_98] - PartitionCols:_col1, _col0 - Select Operator [SEL_91] (rows=1025/319 width=178) - Output:["_col0","_col1"] - Group By Operator [GBY_90] (rows=1025/319 width=178) +Stage-8 + Column Stats Work{} + Stage-5 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.a"} + Stage-4 + Dependency Collection{} + Stage-3 + Reducer 10 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 9 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_115] + table:{"name:":"default.a"} + Group By Operator [GBY_112] (rows=6300/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 8 [SIMPLE_EDGE] + <-Reducer 29 [CONTAINS] + Reduce Output Operator [RS_111] + PartitionCols:_col0, _col1 + Select Operator [SEL_107] (rows=313/304 width=175) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_164] (rows=313/304 width=175) + Conds:RS_104._col1=RS_105._col1(Inner),Output:["_col0","_col3"] + <-Reducer 28 [SIMPLE_EDGE] + SHUFFLE [RS_104] + PartitionCols:_col1 + Merge Join Operator [MERGEJOIN_163] (rows=44/115 width=264) + Conds:RS_101._col0=RS_102._col0(Inner),Output:["_col0","_col1","_col3"] + <-Map 27 [SIMPLE_EDGE] + SHUFFLE [RS_101] + PartitionCols:_col0 + Select Operator [SEL_67] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_153] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_65] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Map 30 [SIMPLE_EDGE] + SHUFFLE [RS_102] + PartitionCols:_col0 + Select Operator [SEL_70] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_154] (rows=25/25 width=175) + predicate:key is not null + TableScan [TS_68] (rows=25/25 width=175) + default@src1,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 37 [SIMPLE_EDGE] + SHUFFLE [RS_105] + PartitionCols:_col1 + Select Operator [SEL_100] (rows=1525/319 width=178) + Output:["_col1"] + Group By Operator [GBY_99] (rows=1525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 31 [SIMPLE_EDGE] - <-Map 36 [CONTAINS] - Reduce Output Operator [RS_89] + <-Union 36 [SIMPLE_EDGE] + <-Map 40 [CONTAINS] + Reduce Output Operator [RS_98] PartitionCols:_col1, _col0 - Select Operator [SEL_85] (rows=500/500 width=178) + Select Operator [SEL_94] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_157] (rows=500/500 width=178) + Filter Operator [FIL_158] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_83] (rows=500/500 width=178) + TableScan [TS_92] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 30 [CONTAINS] - Reduce Output Operator [RS_89] + <-Reducer 35 [CONTAINS] + Reduce Output Operator [RS_98] PartitionCols:_col1, _col0 - Select Operator [SEL_82] (rows=525/319 width=178) + Select Operator [SEL_91] (rows=1025/319 width=178) Output:["_col0","_col1"] - Group By Operator [GBY_81] (rows=525/319 width=178) + Group By Operator [GBY_90] (rows=1025/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 29 [SIMPLE_EDGE] - <-Map 28 [CONTAINS] - Reduce Output Operator [RS_80] + <-Union 34 [SIMPLE_EDGE] + <-Map 39 [CONTAINS] + Reduce Output Operator [RS_89] PartitionCols:_col1, _col0 - Select Operator [SEL_73] (rows=25/25 width=175) + Select Operator [SEL_85] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_155] (rows=25/25 width=175) + Filter Operator [FIL_157] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_71] (rows=25/25 width=175) + TableScan [TS_83] (rows=500/500 width=178) Output:["key","value"] - <-Map 35 [CONTAINS] - Reduce Output Operator [RS_80] + <-Reducer 33 [CONTAINS] + Reduce Output Operator [RS_89] PartitionCols:_col1, _col0 - Select Operator [SEL_76] (rows=500/500 width=178) + Select Operator [SEL_82] (rows=525/319 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_156] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_74] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 7 [CONTAINS] - Reduce Output Operator [RS_111] - PartitionCols:_col0, _col1 - Group By Operator [GBY_63] (rows=5987/309 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 6 [SIMPLE_EDGE] - <-Reducer 19 [CONTAINS] - Reduce Output Operator [RS_62] - PartitionCols:_col0, _col1 - Select Operator [SEL_58] (rows=5839/1056 width=178) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_162] (rows=5839/1056 width=178) - Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col2","_col5"] - <-Map 23 [SIMPLE_EDGE] - SHUFFLE [RS_56] - PartitionCols:_col0 - Select Operator [SEL_51] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_152] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_49] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 18 [SIMPLE_EDGE] - SHUFFLE [RS_55] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_161] (rows=2394/512 width=87) - Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2"] - <-Map 22 [SIMPLE_EDGE] - SHUFFLE [RS_53] - PartitionCols:_col1 - Select Operator [SEL_48] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_151] (rows=500/500 width=178) - predicate:(key is not null and value is not null) - TableScan [TS_46] (rows=500/500 width=178) - default@src,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 17 [SIMPLE_EDGE] - SHUFFLE [RS_52] - PartitionCols:_col1 - Select Operator [SEL_45] (rows=1025/319 width=178) - Output:["_col1"] - Group By Operator [GBY_44] (rows=1025/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 16 [SIMPLE_EDGE] - <-Map 21 [CONTAINS] - Reduce Output Operator [RS_43] - PartitionCols:_col1, _col0 - Select Operator [SEL_39] (rows=500/500 width=178) + Group By Operator [GBY_81] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 32 [SIMPLE_EDGE] + <-Map 31 [CONTAINS] + Reduce Output Operator [RS_80] + PartitionCols:_col1, _col0 + Select Operator [SEL_73] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_155] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_71] (rows=25/25 width=175) + Output:["key","value"] + <-Map 38 [CONTAINS] + Reduce Output Operator [RS_80] + PartitionCols:_col1, _col0 + Select Operator [SEL_76] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_156] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_74] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 7 [CONTAINS] + Reduce Output Operator [RS_111] + PartitionCols:_col0, _col1 + Group By Operator [GBY_63] (rows=5987/309 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 6 [SIMPLE_EDGE] + <-Reducer 22 [CONTAINS] + Reduce Output Operator [RS_62] + PartitionCols:_col0, _col1 + Select Operator [SEL_58] (rows=5839/1056 width=178) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_162] (rows=5839/1056 width=178) + Conds:RS_55._col2=RS_56._col0(Inner),Output:["_col2","_col5"] + <-Map 26 [SIMPLE_EDGE] + SHUFFLE [RS_56] + PartitionCols:_col0 + Select Operator [SEL_51] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_152] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_49] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 21 [SIMPLE_EDGE] + SHUFFLE [RS_55] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_161] (rows=2394/512 width=87) + Conds:RS_52._col1=RS_53._col1(Inner),Output:["_col2"] + <-Map 25 [SIMPLE_EDGE] + SHUFFLE [RS_53] + PartitionCols:_col1 + Select Operator [SEL_48] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_150] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_37] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 15 [CONTAINS] - Reduce Output Operator [RS_43] - PartitionCols:_col1, _col0 - Select Operator [SEL_36] (rows=525/319 width=178) + Filter Operator [FIL_151] (rows=500/500 width=178) + predicate:(key is not null and value is not null) + TableScan [TS_46] (rows=500/500 width=178) + default@src,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 20 [SIMPLE_EDGE] + SHUFFLE [RS_52] + PartitionCols:_col1 + Select Operator [SEL_45] (rows=1025/319 width=178) + Output:["_col1"] + Group By Operator [GBY_44] (rows=1025/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 19 [SIMPLE_EDGE] + <-Map 24 [CONTAINS] + Reduce Output Operator [RS_43] + PartitionCols:_col1, _col0 + Select Operator [SEL_39] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_150] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_37] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 18 [CONTAINS] + Reduce Output Operator [RS_43] + PartitionCols:_col1, _col0 + Select Operator [SEL_36] (rows=525/319 width=178) + Output:["_col0","_col1"] + Group By Operator [GBY_35] (rows=525/319 width=178) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 17 [SIMPLE_EDGE] + <-Map 16 [CONTAINS] + Reduce Output Operator [RS_34] + PartitionCols:_col1, _col0 + Select Operator [SEL_27] (rows=25/25 width=175) + Output:["_col0","_col1"] + Filter Operator [FIL_148] (rows=25/25 width=175) + predicate:value is not null + TableScan [TS_25] (rows=25/25 width=175) + Output:["key","value"] + <-Map 23 [CONTAINS] + Reduce Output Operator [RS_34] + PartitionCols:_col1, _col0 + Select Operator [SEL_30] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_149] (rows=500/500 width=178) + predicate:value is not null + TableScan [TS_28] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 5 [CONTAINS] + Reduce Output Operator [RS_62] + PartitionCols:_col0, _col1 + Select Operator [SEL_24] (rows=148/61 width=177) + Output:["_col0","_col1"] + Merge Join Operator [MERGEJOIN_160] (rows=148/61 width=177) + Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col2","_col5"] + <-Map 15 [SIMPLE_EDGE] + SHUFFLE [RS_22] + PartitionCols:_col0 + Select Operator [SEL_17] (rows=500/500 width=178) + Output:["_col0","_col1"] + Filter Operator [FIL_147] (rows=500/500 width=178) + predicate:key is not null + TableScan [TS_15] (rows=500/500 width=178) + default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 4 [SIMPLE_EDGE] + SHUFFLE [RS_21] + PartitionCols:_col2 + Merge Join Operator [MERGEJOIN_159] (rows=61/52 width=86) + Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2"] + <-Map 14 [SIMPLE_EDGE] + SHUFFLE [RS_19] + PartitionCols:_col1 + Select Operator [SEL_14] (rows=25/25 width=175) Output:["_col0","_col1"] - Group By Operator [GBY_35] (rows=525/319 width=178) + Filter Operator [FIL_146] (rows=25/25 width=175) + predicate:(key is not null and value is not null) + TableScan [TS_12] (rows=25/25 width=175) + default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Reducer 3 [SIMPLE_EDGE] + SHUFFLE [RS_18] + PartitionCols:_col1 + Select Operator [SEL_11] (rows=525/319 width=178) + Output:["_col1"] + Group By Operator [GBY_10] (rows=525/319 width=178) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 14 [SIMPLE_EDGE] - <-Map 13 [CONTAINS] - Reduce Output Operator [RS_34] + <-Union 2 [SIMPLE_EDGE] + <-Map 1 [CONTAINS] + Reduce Output Operator [RS_9] PartitionCols:_col1, _col0 - Select Operator [SEL_27] (rows=25/25 width=175) + Select Operator [SEL_2] (rows=25/25 width=175) Output:["_col0","_col1"] - Filter Operator [FIL_148] (rows=25/25 width=175) + Filter Operator [FIL_144] (rows=25/25 width=175) predicate:value is not null - TableScan [TS_25] (rows=25/25 width=175) + TableScan [TS_0] (rows=25/25 width=175) Output:["key","value"] - <-Map 20 [CONTAINS] - Reduce Output Operator [RS_34] + <-Map 13 [CONTAINS] + Reduce Output Operator [RS_9] PartitionCols:_col1, _col0 - Select Operator [SEL_30] (rows=500/500 width=178) + Select Operator [SEL_5] (rows=500/500 width=178) Output:["_col0","_col1"] - Filter Operator [FIL_149] (rows=500/500 width=178) + Filter Operator [FIL_145] (rows=500/500 width=178) predicate:value is not null - TableScan [TS_28] (rows=500/500 width=178) + TableScan [TS_3] (rows=500/500 width=178) Output:["key","value"] - <-Reducer 5 [CONTAINS] - Reduce Output Operator [RS_62] - PartitionCols:_col0, _col1 - Select Operator [SEL_24] (rows=148/61 width=177) - Output:["_col0","_col1"] - Merge Join Operator [MERGEJOIN_160] (rows=148/61 width=177) - Conds:RS_21._col2=RS_22._col0(Inner),Output:["_col2","_col5"] - <-Map 12 [SIMPLE_EDGE] - SHUFFLE [RS_22] - PartitionCols:_col0 - Select Operator [SEL_17] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_147] (rows=500/500 width=178) - predicate:key is not null - TableScan [TS_15] (rows=500/500 width=178) - default@src,y,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_21] - PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_159] (rows=61/52 width=86) - Conds:RS_18._col1=RS_19._col1(Inner),Output:["_col2"] - <-Map 11 [SIMPLE_EDGE] - SHUFFLE [RS_19] - PartitionCols:_col1 - Select Operator [SEL_14] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_146] (rows=25/25 width=175) - predicate:(key is not null and value is not null) - TableScan [TS_12] (rows=25/25 width=175) - default@src1,x,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Reducer 3 [SIMPLE_EDGE] - SHUFFLE [RS_18] - PartitionCols:_col1 - Select Operator [SEL_11] (rows=525/319 width=178) - Output:["_col1"] - Group By Operator [GBY_10] (rows=525/319 width=178) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 2 [SIMPLE_EDGE] - <-Map 1 [CONTAINS] - Reduce Output Operator [RS_9] - PartitionCols:_col1, _col0 - Select Operator [SEL_2] (rows=25/25 width=175) - Output:["_col0","_col1"] - Filter Operator [FIL_144] (rows=25/25 width=175) - predicate:value is not null - TableScan [TS_0] (rows=25/25 width=175) - Output:["key","value"] - <-Map 10 [CONTAINS] - Reduce Output Operator [RS_9] - PartitionCols:_col1, _col0 - Select Operator [SEL_5] (rows=500/500 width=178) - Output:["_col0","_col1"] - Filter Operator [FIL_145] (rows=500/500 width=178) - predicate:value is not null - TableScan [TS_3] (rows=500/500 width=178) - Output:["key","value"] - File Output Operator [FS_117] - table:{"name:":"default.b"} - Please refer to the previous Group By Operator [GBY_112] - File Output Operator [FS_119] - table:{"name:":"default.c"} - Please refer to the previous Group By Operator [GBY_112] -Stage-6 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.b"} - Please refer to the previous Stage-4 -Stage-7 - Stats-Aggr Operator - Stage-2 - Move Operator - table:{"name:":"default.c"} - Please refer to the previous Stage-4 + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=6300/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] + File Output Operator [FS_117] + table:{"name:":"default.b"} + Please refer to the previous Group By Operator [GBY_112] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=6300/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] + File Output Operator [FS_119] + table:{"name:":"default.c"} + Please refer to the previous Group By Operator [GBY_112] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=6300/319 width=178) + Output:["key","value"] + Please refer to the previous Group By Operator [GBY_112] + Reducer 11 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <- Please refer to the previous Reducer 9 [CUSTOM_SIMPLE_EDGE] + Reducer 12 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <- Please refer to the previous Reducer 9 [CUSTOM_SIMPLE_EDGE] + Stage-6 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.b"} + Please refer to the previous Stage-4 + Stage-7 + Stats-Aggr Operator + Stage-2 + Move Operator + table:{"name:":"default.c"} + Please refer to the previous Stage-4 +Stage-9 + Column Stats Work{} + Please refer to the previous Stage-5 + Please refer to the previous Stage-6 + Please refer to the previous Stage-7 +Stage-10 + Column Stats Work{} + Please refer to the previous Stage-5 + Please refer to the previous Stage-6 + Please refer to the previous Stage-7 PREHOOK: query: CREATE TABLE DEST1(key STRING, value STRING) STORED AS TEXTFILE PREHOOK: type: CREATETABLE @@ -1764,64 +1870,89 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (CUSTOM_SIMPLE_EDGE) +Reducer 7 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 5 - File Output Operator [FS_19] - table:{"name:":"default.dest1"} - Select Operator [SEL_17] (rows=205/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_16] (rows=205/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Reducer 4 [SIMPLE_EDGE] - SHUFFLE [RS_15] - PartitionCols:_col0 - Group By Operator [GBY_12] (rows=501/310 width=272) - Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_11] - PartitionCols:_col0, _col1 - Select Operator [SEL_7] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500/500 width=178) - Output:["key","value"] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_11] - PartitionCols:_col0, _col1 - Select Operator [SEL_5] (rows=1/1 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count(1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - File Output Operator [FS_25] - table:{"name:":"default.dest2"} - Select Operator [SEL_23] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_22] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 - Please refer to the previous Group By Operator [GBY_12] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 6 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 5 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_19] + table:{"name:":"default.dest1"} + Select Operator [SEL_17] (rows=205/310 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_16] (rows=205/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Reducer 4 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_15] + PartitionCols:_col0 + Group By Operator [GBY_12] (rows=501/310 width=272) + Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_11] + PartitionCols:_col0, _col1 + Select Operator [SEL_7] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500/500 width=178) + Output:["key","value"] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_11] + PartitionCols:_col0, _col1 + Select Operator [SEL_5] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count(1)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + File Output Operator [FS_25] + table:{"name:":"default.dest2"} + Select Operator [SEL_23] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_22] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 + Please refer to the previous Group By Operator [GBY_12] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=501/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_23] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=205/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_17] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)"] + <- Please refer to the previous Reducer 4 [CUSTOM_SIMPLE_EDGE] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b (b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key PREHOOK: type: QUERY @@ -1995,77 +2126,101 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) -Map 7 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) +Map 9 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 4 - File Output Operator [FS_18] - table:{"name:":"default.dest1"} - Select Operator [SEL_16] (rows=205/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_15] (rows=205/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Select Operator [SEL_7] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_7] - <-Map 7 [CONTAINS] - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Select Operator [SEL_11] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_10] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_11] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_14] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=1/1 width=272) +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_18] + table:{"name:":"default.dest1"} + Select Operator [SEL_16] (rows=205/310 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count(1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_20] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_5] - Reducer 5 - File Output Operator [FS_24] - table:{"name:":"default.dest2"} - Select Operator [SEL_22] (rows=1001/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_21] (rows=1001/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Group By Operator [GBY_15] (rows=205/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_7] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_7] + <-Map 9 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_11] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_10] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_11] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_14] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count(1)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_20] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_5] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=205/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_16] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_24] + table:{"name:":"default.dest2"} + Select Operator [SEL_22] (rows=1001/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_21] (rows=1001/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=1001/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_22] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 PREHOOK: query: FROM ( select 'tst1' as key, cast(count(1) as string) as value, 'tst1' as value2 from src s1 @@ -2114,64 +2269,88 @@ POSTHOOK: type: QUERY Plan optimized by CBO. Vertex dependency in root stage -Map 6 <- Union 3 (CONTAINS) +Map 8 <- Union 3 (CONTAINS) Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) -Reducer 5 <- Union 3 (SIMPLE_EDGE) +Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) +Reducer 6 <- Union 3 (SIMPLE_EDGE) +Reducer 7 <- Reducer 6 (CUSTOM_SIMPLE_EDGE) -Stage-4 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.dest1"} - Stage-3 - Dependency Collection{} - Stage-2 - Reducer 4 - File Output Operator [FS_15] - table:{"name:":"default.dest1"} - Select Operator [SEL_13] (rows=205/310 width=272) - Output:["_col0","_col1"] - Group By Operator [GBY_12] (rows=205/310 width=96) - Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 - <-Union 3 [SIMPLE_EDGE] - <-Map 6 [CONTAINS] - Reduce Output Operator [RS_11] - PartitionCols:_col0 - Select Operator [SEL_7] (rows=500/500 width=178) - Output:["_col0","_col1"] - TableScan [TS_6] (rows=500/500 width=178) - Output:["key","value"] - Reduce Output Operator [RS_17] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_7] - <-Reducer 2 [CONTAINS] - Reduce Output Operator [RS_11] - PartitionCols:_col0 - Select Operator [SEL_5] (rows=1/1 width=272) +Stage-6 + Column Stats Work{} + Stage-4 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.dest1"} + Stage-3 + Dependency Collection{} + Stage-2 + Reducer 5 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=960) + Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)"] + <-Reducer 4 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_15] + table:{"name:":"default.dest1"} + Select Operator [SEL_13] (rows=205/310 width=272) Output:["_col0","_col1"] - Group By Operator [GBY_4] (rows=1/1 width=8) - Output:["_col0"],aggregations:["count(1)"] - <-Map 1 [CUSTOM_SIMPLE_EDGE] - PARTITION_ONLY_SHUFFLE [RS_3] - Select Operator [SEL_1] (rows=500/500 width=10) - TableScan [TS_0] (rows=500/500 width=10) - default@src,s1,Tbl:COMPLETE,Col:COMPLETE - Reduce Output Operator [RS_17] - PartitionCols:_col0, _col1 - Please refer to the previous Select Operator [SEL_5] - Reducer 5 - File Output Operator [FS_21] - table:{"name:":"default.dest2"} - Select Operator [SEL_19] (rows=501/310 width=456) - Output:["_col0","_col1","_col2"] - Group By Operator [GBY_18] (rows=501/310 width=280) - Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 - <- Please refer to the previous Union 3 [SIMPLE_EDGE] -Stage-5 - Stats-Aggr Operator - Stage-1 - Move Operator - table:{"name:":"default.dest2"} - Please refer to the previous Stage-3 + Group By Operator [GBY_12] (rows=205/310 width=96) + Output:["_col0","_col1"],aggregations:["count(DISTINCT KEY._col1:0._col0)"],keys:KEY._col0 + <-Union 3 [SIMPLE_EDGE] + <-Map 8 [CONTAINS] + Reduce Output Operator [RS_11] + PartitionCols:_col0 + Select Operator [SEL_7] (rows=500/500 width=178) + Output:["_col0","_col1"] + TableScan [TS_6] (rows=500/500 width=178) + Output:["key","value"] + Reduce Output Operator [RS_17] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_7] + <-Reducer 2 [CONTAINS] + Reduce Output Operator [RS_11] + PartitionCols:_col0 + Select Operator [SEL_5] (rows=1/1 width=272) + Output:["_col0","_col1"] + Group By Operator [GBY_4] (rows=1/1 width=8) + Output:["_col0"],aggregations:["count(1)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_3] + Select Operator [SEL_1] (rows=500/310 width=10) + TableScan [TS_0] (rows=500/500 width=10) + default@src,s1,Tbl:COMPLETE,Col:COMPLETE + Reduce Output Operator [RS_17] + PartitionCols:_col0, _col1 + Please refer to the previous Select Operator [SEL_5] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=205/310 width=272) + Output:["key","value"] + Please refer to the previous Select Operator [SEL_13] + Reducer 7 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=1440) + Output:["_col0","_col1","_col2"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)"] + <-Reducer 6 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_21] + table:{"name:":"default.dest2"} + Select Operator [SEL_19] (rows=501/310 width=456) + Output:["_col0","_col1","_col2"] + Group By Operator [GBY_18] (rows=501/310 width=280) + Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col2:0._col0)"],keys:KEY._col0, KEY._col1 + <- Please refer to the previous Union 3 [SIMPLE_EDGE] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=501/310 width=456) + Output:["key","val1","val2"] + Please refer to the previous Select Operator [SEL_19] + Stage-5 + Stats-Aggr Operator + Stage-1 + Move Operator + table:{"name:":"default.dest2"} + Please refer to the previous Stage-3 +Stage-7 + Column Stats Work{} + Please refer to the previous Stage-4 + Please refer to the previous Stage-5 diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 087f916..a5aa12e 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -652,35 +652,48 @@ POSTHOOK: query: explain analyze insert overwrite table orc_merge5 select userid POSTHOOK: type: QUERY Plan optimized by CBO. -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.orc_merge5"} - Stage-2 - Dependency Collection{} - Stage-5(CONDITIONAL) - Move Operator - Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) - Conditional Operator - Stage-1 - Map 1 - File Output Operator [FS_3] - table:{"name:":"default.orc_merge5"} - Select Operator [SEL_2] (rows=306/3 width=268) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_4] (rows=306/3 width=268) - predicate:(userid <= 13) - TableScan [TS_0] (rows=919/15000 width=268) - default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] - Stage-4(CONDITIONAL) - File Merge - Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) - Stage-7 - Move Operator - Stage-6(CONDITIONAL) +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + +Stage-9 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.orc_merge5"} + Stage-2 + Dependency Collection{} + Stage-5(CONDITIONAL) + Move Operator + Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) + Conditional Operator + Stage-1 + Reducer 2 + File Output Operator [FS_5] + Group By Operator [GBY_3] (rows=1/1 width=2608) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0, 16)","compute_stats(VALUE._col2, 16)","compute_stats(VALUE._col3, 16)","compute_stats(VALUE._col4, 16)","compute_stats(VALUE._col5, 16)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_3] + table:{"name:":"default.orc_merge5"} + Select Operator [SEL_2] (rows=306/3 width=268) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_4] (rows=306/3 width=268) + predicate:(userid <= 13) + TableScan [TS_0] (rows=919/15000 width=268) + default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + PARTITION_ONLY_SHUFFLE [RS_2] + Select Operator [SEL_1] (rows=306/3 width=268) + Output:["userid","string1","subtype","decimal1","ts"] + Please refer to the previous Select Operator [SEL_2] + Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) + Stage-7 + Move Operator + Stage-6(CONDITIONAL) + File Merge + Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) PREHOOK: query: drop table orc_merge5 PREHOOK: type: DROPTABLE @@ -835,23 +848,23 @@ Stage-0 Stage-1 Map 2 File Output Operator [FS_10] - Select Operator [SEL_9] (rows=550/480 width=18) + Select Operator [SEL_9] (rows=617/480 width=186) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_25] (rows=550/480 width=18) + Map Join Operator [MAPJOIN_25] (rows=617/480 width=186) BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] MULTICAST [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=242/242 width=18) + Select Operator [SEL_2] (rows=242/242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=242/242 width=18) + Filter Operator [FIL_13] (rows=242/242 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=18) - default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_5] (rows=500/500 width=18) + TableScan [TS_0] (rows=242/242 width=95) + default@tab,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Select Operator [SEL_5] (rows=500/500 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_14] (rows=500/500 width=18) + Filter Operator [FIL_14] (rows=500/500 width=95) predicate:key is not null - TableScan [TS_3] (rows=500/500 width=18) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_3] (rows=500/500 width=95) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index cbfb33a..85178ea 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -496,35 +496,50 @@ POSTHOOK: query: explain insert overwrite table orc_merge5 select userid,string1 POSTHOOK: type: QUERY Plan optimized by CBO. -Stage-3 - Stats-Aggr Operator - Stage-0 - Move Operator - table:{"name:":"default.orc_merge5"} - Stage-2 - Dependency Collection{} - Stage-5(CONDITIONAL) - Move Operator - Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) - Conditional Operator - Stage-1 - Map 1 vectorized - File Output Operator [FS_3] - table:{"name:":"default.orc_merge5"} - Select Operator [SEL_2] (rows=306 width=268) - Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_4] (rows=306 width=268) - predicate:(userid <= 13) - TableScan [TS_0] (rows=919 width=268) - default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] - Stage-4(CONDITIONAL) - File Merge - Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) - Stage-7 - Move Operator - Stage-6(CONDITIONAL) +Vertex dependency in root stage +Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + +Stage-9 + Column Stats Work{} + Stage-3 + Stats-Aggr Operator + Stage-0 + Move Operator + table:{"name:":"default.orc_merge5"} + Stage-2 + Dependency Collection{} + Stage-5(CONDITIONAL) + Move Operator + Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) + Conditional Operator + Stage-1 + Reducer 2 + File Output Operator [FS_6] + Group By Operator [GBY_4] (rows=1 width=2620) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)"] + <-Map 1 [CUSTOM_SIMPLE_EDGE] + File Output Operator [FS_3] + table:{"name:":"default.orc_merge5"} + Select Operator [SEL_2] (rows=306 width=268) + Output:["_col0","_col1","_col2","_col3","_col4"] + Filter Operator [FIL_4] (rows=306 width=268) + predicate:(userid <= 13) + TableScan [TS_0] (rows=919 width=268) + default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] + PARTITION_ONLY_SHUFFLE [RS_3] + Group By Operator [GBY_2] (rows=1 width=2604) + Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(userid, 16)","compute_stats(string1, 16)","compute_stats(subtype, 16)","compute_stats(decimal1, 16)","compute_stats(ts, 16)"] + Select Operator [SEL_1] (rows=306 width=268) + Output:["userid","string1","subtype","decimal1","ts"] + Please refer to the previous Select Operator [SEL_2] + Stage-4(CONDITIONAL) File Merge Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) + Stage-7 + Move Operator + Stage-6(CONDITIONAL) + File Merge + Please refer to the previous Stage-8(CONDITIONAL CHILD TASKS: Stage-5, Stage-4, Stage-6) PREHOOK: query: drop table orc_merge5 PREHOOK: type: DROPTABLE @@ -663,23 +678,23 @@ Stage-0 Stage-1 Map 2 vectorized File Output Operator [FS_10] - Select Operator [SEL_9] (rows=550 width=18) + Select Operator [SEL_9] (rows=617 width=186) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_25] (rows=550 width=18) + Map Join Operator [MAPJOIN_25] (rows=617 width=186) BucketMapJoin:true,Conds:RS_6._col0=SEL_5._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized MULTICAST [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=242 width=18) + Select Operator [SEL_2] (rows=242 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_13] (rows=242 width=18) + Filter Operator [FIL_13] (rows=242 width=95) predicate:key is not null - TableScan [TS_0] (rows=242 width=18) - default@tab,a,Tbl:COMPLETE,Col:NONE,Output:["key","value"] - <-Select Operator [SEL_5] (rows=500 width=18) + TableScan [TS_0] (rows=242 width=95) + default@tab,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] + <-Select Operator [SEL_5] (rows=500 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_14] (rows=500 width=18) + Filter Operator [FIL_14] (rows=500 width=95) predicate:key is not null - TableScan [TS_3] (rows=500 width=18) - default@tab_part,b,Tbl:COMPLETE,Col:NONE,Output:["key","value"] + TableScan [TS_3] (rows=500 width=95) + default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out index f3a5dc3..e9a5bad 100644 --- a/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out +++ b/ql/src/test/results/clientpositive/tez/hybridgrace_hashjoin_1.q.out @@ -741,14 +741,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -757,43 +757,43 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -857,14 +857,14 @@ STAGE PLANS: Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -873,44 +873,44 @@ STAGE PLANS: 1 _col0 (type: string) input vertices: 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE HybridGraceHashJoin: true Group By Operator aggregations: count() mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Map 3 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -966,67 +966,72 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1082,68 +1087,72 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: p1 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - input vertices: - 1 Map 3 - Statistics: Num rows: 577 Data size: 18341 Basic stats: COMPLETE Column stats: NONE - HybridGraceHashJoin: true - Group By Operator - aggregations: count() - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint) - Map 3 + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE + Map 4 Map Operator Tree: TableScan alias: p2 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string) outputColumnNames: _col0 - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 525 Data size: 16674 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 525 Data size: 45675 Basic stats: COMPLETE Column stats: COMPLETE Reducer 2 Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Statistics: Num rows: 1344 Data size: 10752 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out b/ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out index 82c09fa..7f7d354 100644 --- a/ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out +++ b/ql/src/test/results/clientpositive/tez/multi_count_distinct.q.out @@ -41,19 +41,19 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_9] Group By Operator [GBY_8] (rows=1 width=24) Output:["_col0","_col1","_col2"],aggregations:["count(_col0)","count(_col1)","count(_col2)"] - Select Operator [SEL_6] (rows=13 width=5) + Select Operator [SEL_6] (rows=13 width=97) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_5] (rows=13 width=5) + Group By Operator [GBY_5] (rows=13 width=97) Output:["_col0","_col1","_col2","_col3"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_3] (rows=27 width=5) + Group By Operator [GBY_3] (rows=13 width=97) Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, 0 - Select Operator [SEL_1] (rows=9 width=5) + Select Operator [SEL_1] (rows=9 width=93) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=5) - default@employee,employee,Tbl:COMPLETE,Col:NONE,Output:["department_id","gender","education_level"] + TableScan [TS_0] (rows=9 width=93) + default@employee,employee,Tbl:COMPLETE,Col:COMPLETE,Output:["department_id","gender","education_level"] PREHOOK: query: select count(distinct department_id), count(distinct gender), count(distinct education_level) from employee PREHOOK: type: QUERY @@ -113,7 +113,7 @@ Stage-0 Stage-1 Reducer 3 File Output Operator [FS_12] - Select Operator [SEL_11] (rows=1 width=40) + Select Operator [SEL_11] (rows=1 width=56) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] Group By Operator [GBY_10] (rows=1 width=40) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count(VALUE._col0)","count(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","count(VALUE._col4)"] @@ -121,19 +121,19 @@ Stage-0 PARTITION_ONLY_SHUFFLE [RS_9] Group By Operator [GBY_8] (rows=1 width=40) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["count(_col0)","count(_col1)","count(_col2)","count(_col3)","count(_col4)"] - Select Operator [SEL_6] (rows=22 width=5) + Select Operator [SEL_6] (rows=22 width=97) Output:["_col0","_col1","_col2","_col3","_col4"] - Group By Operator [GBY_5] (rows=22 width=5) + Group By Operator [GBY_5] (rows=22 width=97) Output:["_col0","_col1","_col2","_col3"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2, _col3 - Group By Operator [GBY_3] (rows=45 width=5) + Group By Operator [GBY_3] (rows=22 width=97) Output:["_col0","_col1","_col2","_col3"],keys:_col0, _col1, _col2, 0 - Select Operator [SEL_1] (rows=9 width=5) + Select Operator [SEL_1] (rows=9 width=93) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=9 width=5) - default@employee,employee,Tbl:COMPLETE,Col:NONE,Output:["gender","department_id","education_level"] + TableScan [TS_0] (rows=9 width=93) + default@employee,employee,Tbl:COMPLETE,Col:COMPLETE,Output:["gender","department_id","education_level"] PREHOOK: query: select count(distinct gender), count(distinct department_id), count(distinct gender), count(distinct education_level), count(distinct education_level, department_id), count(distinct department_id, education_level), count(distinct department_id, education_level, gender) from employee diff --git a/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out index df79710..442badf 100644 --- a/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_join_part_col_char.q.out @@ -110,28 +110,28 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_10] - Merge Join Operator [MERGEJOIN_21] (rows=2 width=431) + Merge Join Operator [MERGEJOIN_21] (rows=2 width=429) Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_6] PartitionCols:_col2 - Select Operator [SEL_2] (rows=2 width=134) + Select Operator [SEL_2] (rows=2 width=237) Output:["_col0","_col1","_col2"] - TableScan [TS_0] (rows=2 width=236) - default@char_tbl1,c1,Tbl:COMPLETE,Col:PARTIAL,Output:["name","age"] - Dynamic Partitioning Event Operator [EVENT_20] (rows=1 width=134) - Group By Operator [GBY_19] (rows=1 width=134) + TableScan [TS_0] (rows=2 width=237) + default@char_tbl1,c1,Tbl:COMPLETE,Col:COMPLETE,Output:["name","age"] + Dynamic Partitioning Event Operator [EVENT_20] (rows=1 width=237) + Group By Operator [GBY_19] (rows=1 width=237) Output:["_col0"],keys:_col0 - Select Operator [SEL_18] (rows=2 width=134) + Select Operator [SEL_18] (rows=2 width=237) Output:["_col0"] Please refer to the previous Select Operator [SEL_2] <-Map 3 [SIMPLE_EDGE] vectorized SHUFFLE [RS_7] PartitionCols:_col2 - Select Operator [SEL_5] (rows=2 width=89) + Select Operator [SEL_5] (rows=2 width=192) Output:["_col0","_col1","_col2"] - TableScan [TS_3] (rows=2 width=190) - default@char_tbl2,c2,Tbl:COMPLETE,Col:PARTIAL,Output:["name","age"] + TableScan [TS_3] (rows=2 width=192) + default@char_tbl2,c2,Tbl:COMPLETE,Col:COMPLETE,Output:["name","age"] PREHOOK: query: select c1.name, c1.age, c1.gpa, c2.name, c2.age, c2.gpa from char_tbl1 c1 join char_tbl2 c2 on (c1.gpa = c2.gpa) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out b/ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out index 647a095..6983333 100644 --- a/ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out +++ b/ql/src/test/results/clientpositive/tez/vector_non_string_partition.q.out @@ -51,7 +51,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 351442 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 3073 Data size: 24584 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -61,7 +61,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), ctinyint (type: tinyint) outputColumnNames: _col0, _col1 @@ -69,7 +69,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 4] - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) sort order: + @@ -78,7 +78,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: tinyint) Execution mode: vectorized @@ -107,19 +107,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1024 Data size: 4096 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1024 Data size: 8192 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -175,7 +175,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: non_string_part - Statistics: Num rows: 3073 Data size: 363734 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3073 Data size: 313446 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true projectedOutputColumns: [0, 1, 2, 3, 4] @@ -185,7 +185,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean predicate: (cint > 0) (type: boolean) - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string) outputColumnNames: _col0, _col1 @@ -193,7 +193,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ @@ -202,7 +202,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: No TopN IS false - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -230,19 +230,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumns: [0, 1] - Statistics: Num rows: 1024 Data size: 121205 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1024 Data size: 104448 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 1180 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10 Data size: 1020 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/truncate_column.q.out b/ql/src/test/results/clientpositive/truncate_column.q.out index cc3bc89..7ff963c 100644 --- a/ql/src/test/results/clientpositive/truncate_column.q.out +++ b/ql/src/test/results/clientpositive/truncate_column.q.out @@ -36,7 +36,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 @@ -297,7 +297,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} #### A masked pattern was here #### numFiles 1 numRows 10 @@ -495,7 +495,7 @@ Database: default Table: test_tab_part #### A masked pattern was here #### Partition Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 10 rawDataSize 94 diff --git a/ql/src/test/results/clientpositive/udf1.q.out b/ql/src/test/results/clientpositive/udf1.q.out index eebd90f..91352af 100644 --- a/ql/src/test/results/clientpositive/udf1.q.out +++ b/ql/src/test/results/clientpositive/udf1.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -64,6 +65,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string), _col7 (type: string), _col8 (type: string), _col9 (type: string), _col10 (type: string), _col11 (type: string), _col12 (type: string), _col13 (type: string), _col14 (type: string), _col15 (type: string), _col16 (type: string), _col17 (type: string), _col18 (type: string), _col19 (type: string) + outputColumnNames: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16), compute_stats(c8, 16), compute_stats(c9, 16), compute_stats(c10, 16), compute_stats(c11, 16), compute_stats(c12, 16), compute_stats(c13, 16), compute_stats(c14, 16), compute_stats(c15, 16), compute_stats(c16, 16), compute_stats(c17, 16), compute_stats(c18, 16), compute_stats(c19, 16), compute_stats(c20, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: struct), _col16 (type: struct), _col17 (type: struct), _col18 (type: struct), _col19 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6), compute_stats(VALUE._col7), compute_stats(VALUE._col8), compute_stats(VALUE._col9), compute_stats(VALUE._col10), compute_stats(VALUE._col11), compute_stats(VALUE._col12), compute_stats(VALUE._col13), compute_stats(VALUE._col14), compute_stats(VALUE._col15), compute_stats(VALUE._col16), compute_stats(VALUE._col17), compute_stats(VALUE._col18), compute_stats(VALUE._col19) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 9840 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -87,6 +114,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, c13, c14, c15, c16, c17, c18, c19, c20 + Column Types: string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string, string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf3.q.out b/ql/src/test/results/clientpositive/udf3.q.out index 96038f1..3b59e7f 100644 --- a/ql/src/test/results/clientpositive/udf3.q.out +++ b/ql/src/test/results/clientpositive/udf3.q.out @@ -18,6 +18,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-3 depends on stages: Stage-2 STAGE PLANS: Stage: Stage-1 @@ -55,6 +56,26 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string) + outputColumnNames: c1, c2, c3, c4, c5 + Statistics: Num rows: 1 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 1 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 2460 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Move Operator @@ -69,6 +90,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-3 + Column Stats Work + Column Stats Desc: + Columns: c1, c2, c3, c4, c5 + Column Types: string, string, string, string, string + Table: default.dest1 + PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(CAST('' AS INT)), sum(CAST('' AS INT)), avg(CAST('' AS INT)), min(CAST('' AS INT)), max(CAST('' AS INT)) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/udf_10_trims.q.out b/ql/src/test/results/clientpositive/udf_10_trims.q.out index 3a5303a..746a2ae 100644 --- a/ql/src/test/results/clientpositive/udf_10_trims.q.out +++ b/ql/src/test/results/clientpositive/udf_10_trims.q.out @@ -24,6 +24,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -50,6 +51,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -73,6 +100,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_character_length.q.out b/ql/src/test/results/clientpositive/udf_character_length.q.out index 332ec95..a85225f 100644 --- a/ql/src/test/results/clientpositive/udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/udf_character_length.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -71,6 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -94,6 +121,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_length.q.out b/ql/src/test/results/clientpositive/udf_length.q.out index fc795bb..0da98e2 100644 --- a/ql/src/test/results/clientpositive/udf_length.q.out +++ b/ql/src/test/results/clientpositive/udf_length.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_octet_length.q.out b/ql/src/test/results/clientpositive/udf_octet_length.q.out index f8738f8..af648fb 100644 --- a/ql/src/test/results/clientpositive/udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/udf_octet_length.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/udf_reverse.q.out b/ql/src/test/results/clientpositive/udf_reverse.q.out index 28b0c9f..9257b2c 100644 --- a/ql/src/test/results/clientpositive/udf_reverse.q.out +++ b/ql/src/test/results/clientpositive/udf_reverse.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,6 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -77,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: string + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union10.q.out b/ql/src/test/results/clientpositive/union10.q.out index 85eabf5..5337e53 100644 --- a/ql/src/test/results/clientpositive/union10.q.out +++ b/ql/src/test/results/clientpositive/union10.q.out @@ -29,6 +29,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -88,6 +89,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +145,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -141,6 +194,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union12.q.out b/ql/src/test/results/clientpositive/union12.q.out index 1b02d16..30654a9 100644 --- a/ql/src/test/results/clientpositive/union12.q.out +++ b/ql/src/test/results/clientpositive/union12.q.out @@ -29,6 +29,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -88,6 +89,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -103,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 3 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE @@ -118,6 +145,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 3 Data size: 276 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -141,6 +194,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union17.q.out b/ql/src/test/results/clientpositive/union17.q.out index 18e06e1..7398d9b 100644 --- a/ql/src/test/results/clientpositive/union17.q.out +++ b/ql/src/test/results/clientpositive/union17.q.out @@ -33,9 +33,13 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 + Stage-11 depends on stages: Stage-4, Stage-5, Stage-7, Stage-8 Stage-5 depends on stages: Stage-3 - Stage-1 depends on stages: Stage-5 - Stage-6 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-3 + Stage-1 depends on stages: Stage-6 + Stage-7 depends on stages: Stage-1 + Stage-8 depends on stages: Stage-6 STAGE PLANS: Stage: Stage-2 @@ -152,6 +156,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -166,11 +185,47 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-6 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) @@ -194,6 +249,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 1 Data size: 456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -205,9 +275,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-6 + Stage: Stage-7 Stats-Aggr Operator + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git a/ql/src/test/results/clientpositive/union18.q.out b/ql/src/test/results/clientpositive/union18.q.out index caa664c..2a78eaa 100644 --- a/ql/src/test/results/clientpositive/union18.q.out +++ b/ql/src/test/results/clientpositive/union18.q.out @@ -35,16 +35,14 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-13 depends on stages: Stage-4, Stage-10, Stage-11 + Stage-14 depends on stages: Stage-4, Stage-10, Stage-11 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-1 depends on stages: Stage-3 Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 + Stage-11 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -96,6 +94,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -107,6 +118,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -125,6 +151,19 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 501 Data size: 136272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Select Operator expressions: _col0 (type: string), _col1 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL @@ -136,6 +175,34 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -159,6 +226,20 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-13 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -189,15 +270,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -215,31 +287,23 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.dest2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL diff --git a/ql/src/test/results/clientpositive/union19.q.out b/ql/src/test/results/clientpositive/union19.q.out index 5ce5905..a002275 100644 --- a/ql/src/test/results/clientpositive/union19.q.out +++ b/ql/src/test/results/clientpositive/union19.q.out @@ -33,8 +33,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -101,6 +105,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe TableScan alias: s2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -134,6 +153,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + outputColumnNames: key, val1, val2 + Statistics: Num rows: 501 Data size: 228456 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(val1, 16), compute_stats(val2, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0) @@ -153,6 +187,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1 Data size: 272 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -167,6 +216,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.dest1 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, val1, val2 + Column Types: string, string, string + Table: default.dest2 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -177,9 +262,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1440 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1 UNION ALL select s2.key as key, s2.value as value from src s2) unionsrc diff --git a/ql/src/test/results/clientpositive/union22.q.out b/ql/src/test/results/clientpositive/union22.q.out index f60dbf6..3106201 100644 --- a/ql/src/test/results/clientpositive/union22.q.out +++ b/ql/src/test/results/clientpositive/union22.q.out @@ -77,6 +77,7 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-4, Stage-6 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3 Stage-4 STAGE PLANS: @@ -97,7 +98,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -225,7 +226,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -272,7 +273,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -361,6 +362,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false TableScan GatherStats: false Union @@ -395,6 +415,25 @@ STAGE PLANS: TotalFiles: 1 GatherStats: true MultiFileSpray: false + Select Operator + expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), '2' (type: string) + outputColumnNames: k1, k2, k3, k4, ds + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(k1, 16), compute_stats(k2, 16), compute_stats(k3, 16), compute_stats(k4, 16) + keys: ds (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: a + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 348 Data size: 9684 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + auto parallelism: false Path -> Alias: #### A masked pattern was here #### Path -> Partition: @@ -428,7 +467,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 @@ -470,6 +509,40 @@ STAGE PLANS: Truncated Path -> Alias: /dst_union22_delta/ds=1 [null-subquery1:$hdt$_0-subquery1:dst_union22_delta] #### A masked pattern was here #### + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 174 Data size: 4842 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4 + columns.types struct:struct:struct:struct:string + escape.delim \ + hive.serialization.extend.additional.nesting.levels true + serialization.escape.crlf true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false Stage: Stage-0 Move Operator @@ -502,6 +575,14 @@ STAGE PLANS: Stats-Aggr Operator #### A masked pattern was here #### + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: k1, k2, k3, k4 + Column Types: string, string, string, string + Table: default.dst_union22 + Is Table Level Stats: false + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -558,7 +639,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k1":"true","k2":"true","k3":"true","k4":"true"}} bucket_count -1 column.name.delimiter , columns k1,k2,k3,k4 @@ -605,7 +686,7 @@ STAGE PLANS: partition values: ds 1 properties: - COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"} + COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"k0":"true","k1":"true","k2":"true","k3":"true","k4":"true","k5":"true"}} bucket_count -1 column.name.delimiter , columns k0,k1,k2,k3,k4,k5 diff --git a/ql/src/test/results/clientpositive/union28.q.out b/ql/src/test/results/clientpositive/union28.q.out index c3789d0..0d9ff2f 100644 --- a/ql/src/test/results/clientpositive/union28.q.out +++ b/ql/src/test/results/clientpositive/union28.q.out @@ -39,6 +39,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -102,6 +103,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -117,6 +131,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE @@ -132,6 +159,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -155,6 +208,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union29.q.out b/ql/src/test/results/clientpositive/union29.q.out index 87ba275..db92f3c 100644 --- a/ql/src/test/results/clientpositive/union29.q.out +++ b/ql/src/test/results/clientpositive/union29.q.out @@ -38,6 +38,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -67,6 +68,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -88,6 +102,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -109,6 +136,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -132,6 +185,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union30.q.out b/ql/src/test/results/clientpositive/union30.q.out index 26a27c8..d033a51 100644 --- a/ql/src/test/results/clientpositive/union30.q.out +++ b/ql/src/test/results/clientpositive/union30.q.out @@ -53,6 +53,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-12 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -116,6 +117,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -131,6 +145,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE @@ -146,6 +173,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -167,6 +207,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.union_subq_union + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 1500 Data size: 15936 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -190,6 +256,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-12 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: int, string + Table: default.union_subq_union + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union31.q.out b/ql/src/test/results/clientpositive/union31.q.out index 4c26c7b..b04b1ea 100644 --- a/ql/src/test/results/clientpositive/union31.q.out +++ b/ql/src/test/results/clientpositive/union31.q.out @@ -72,9 +72,13 @@ STAGE DEPENDENCIES: Stage-2 is a root stage Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-3, Stage-4, Stage-6, Stage-7 Stage-4 depends on stages: Stage-2 - Stage-1 depends on stages: Stage-4 - Stage-5 depends on stages: Stage-1 + Stage-5 depends on stages: Stage-2 + Stage-1 depends on stages: Stage-5 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-2 @@ -181,6 +185,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t3 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -195,11 +214,47 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: key, cnt + Column Types: string, int + Table: default.t3 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: value, cnt + Column Types: string, int + Table: default.t4 + Stage: Stage-4 Map Reduce Map Operator Tree: TableScan Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) @@ -224,6 +279,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: value, cnt + Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(value, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-1 Move Operator @@ -235,9 +305,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t4 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from (select * from t1 union all @@ -340,12 +432,16 @@ insert overwrite table t6 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-2 is a root stage - Stage-3 depends on stages: Stage-2, Stage-6 + Stage-3 depends on stages: Stage-2, Stage-8 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 - Stage-6 is a root stage + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 + Stage-8 is a root stage STAGE PLANS: Stage: Stage-2 @@ -426,6 +522,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t5 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: sum(VALUE._col0) keys: KEY._col0 (type: string) @@ -444,6 +555,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -458,6 +584,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t5 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t6 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -468,10 +630,32 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t6 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator - Stage: Stage-6 + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-8 Map Reduce Map Operator Tree: TableScan @@ -654,8 +838,12 @@ STAGE DEPENDENCIES: Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 + Stage-9 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-10 depends on stages: Stage-4, Stage-5, Stage-6, Stage-7 + Stage-5 depends on stages: Stage-3 Stage-1 depends on stages: Stage-3 - Stage-5 depends on stages: Stage-1 + Stage-6 depends on stages: Stage-1 + Stage-7 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-2 @@ -752,6 +940,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t7 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Group By Operator aggregations: count(1) keys: KEY._col0 (type: string) @@ -770,6 +973,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: c1, cnt + Statistics: Num rows: 5 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(c1, 16), compute_stats(cnt, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -784,6 +1002,42 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-9 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t7 + + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: c1, cnt + Column Types: string, int + Table: default.t8 + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Stage: Stage-1 Move Operator tables: @@ -794,9 +1048,31 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.t8 - Stage: Stage-5 + Stage: Stage-6 Stats-Aggr Operator + Stage: Stage-7 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: from ( select key as c1, count(1) as cnt from t1 group by key diff --git a/ql/src/test/results/clientpositive/union33.q.out b/ql/src/test/results/clientpositive/union33.q.out index 17aeecd..e9ad715 100644 --- a/ql/src/test/results/clientpositive/union33.q.out +++ b/ql/src/test/results/clientpositive/union33.q.out @@ -32,6 +32,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -124,6 +125,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -135,6 +149,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -158,6 +198,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + Stage: Stage-4 Map Reduce Map Operator Tree: @@ -247,6 +294,7 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-11 depends on stages: Stage-4 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 @@ -330,6 +378,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE @@ -350,6 +411,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_src + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -373,6 +460,13 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-11 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_src + Stage: Stage-5 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union4.q.out b/ql/src/test/results/clientpositive/union4.q.out index 0821589..8524d72 100644 --- a/ql/src/test/results/clientpositive/union4.q.out +++ b/ql/src/test/results/clientpositive/union4.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -83,6 +84,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan Union Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE @@ -98,6 +112,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: int) + outputColumnNames: key, value + Statistics: Num rows: 2 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -121,6 +161,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, int + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union6.q.out b/ql/src/test/results/clientpositive/union6.q.out index 8448d86..53ffe5a 100644 --- a/ql/src/test/results/clientpositive/union6.q.out +++ b/ql/src/test/results/clientpositive/union6.q.out @@ -25,6 +25,7 @@ STAGE DEPENDENCIES: Stage-5 Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 Stage-3 depends on stages: Stage-0 + Stage-10 depends on stages: Stage-3 Stage-4 Stage-6 Stage-7 depends on stages: Stage-6 @@ -78,6 +79,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) TableScan alias: s2 Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE @@ -95,6 +109,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmptable + Select Operator + expressions: _col0 (type: string), _col1 (type: string) + outputColumnNames: key, value + Statistics: Num rows: 26 Data size: 7072 Basic stats: COMPLETE Column stats: PARTIAL + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: PARTIAL + value expressions: _col0 (type: struct), _col1 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: PARTIAL + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-8 Conditional Operator @@ -118,6 +158,13 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-10 + Column Stats Work + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.tmptable + Stage: Stage-4 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/union_lateralview.q.out b/ql/src/test/results/clientpositive/union_lateralview.q.out index 7b89e50..4c65003 100644 --- a/ql/src/test/results/clientpositive/union_lateralview.q.out +++ b/ql/src/test/results/clientpositive/union_lateralview.q.out @@ -48,6 +48,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -178,6 +180,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.test_union_lateral_view + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + outputColumnNames: key, arr_ele, value + Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, 16), compute_stats(arr_ele, 16), compute_stats(value, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -192,6 +209,35 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: key, arr_ele, value + Column Types: int, int, string + Table: default.test_union_lateral_view + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 1444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1452 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_union_lateral_view SELECT b.key, d.arr_ele, d.value FROM ( diff --git a/ql/src/test/results/clientpositive/union_stats.q.out b/ql/src/test/results/clientpositive/union_stats.q.out index 9ea0b51..08ac9ee 100644 --- a/ql/src/test/results/clientpositive/union_stats.q.out +++ b/ql/src/test/results/clientpositive/union_stats.q.out @@ -487,7 +487,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 @@ -522,7 +522,7 @@ Retention: 0 #### A masked pattern was here #### Table Type: MANAGED_TABLE Table Parameters: - COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"} + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}} numFiles 1 numRows 1000 rawDataSize 10624 diff --git a/ql/src/test/results/clientpositive/vector_bucket.q.out b/ql/src/test/results/clientpositive/vector_bucket.q.out index 0eeb8a5..e4f030a 100644 --- a/ql/src/test/results/clientpositive/vector_bucket.q.out +++ b/ql/src/test/results/clientpositive/vector_bucket.q.out @@ -20,6 +20,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -75,6 +77,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.non_orc_table + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: a, b + Statistics: Num rows: 1 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(a, 16), compute_stats(b, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -89,6 +110,49 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: a, b + Column Types: int, string + Table: default.non_orc_table + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: select a, b from non_orc_table order by a PREHOOK: type: QUERY PREHOOK: Input: default@non_orc_table diff --git a/ql/src/test/results/clientpositive/vector_char_4.q.out b/ql/src/test/results/clientpositive/vector_char_4.q.out index 1c58fd2..3464637 100644 --- a/ql/src/test/results/clientpositive/vector_char_4.q.out +++ b/ql/src/test/results/clientpositive/vector_char_4.q.out @@ -137,6 +137,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -148,38 +149,58 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS CHAR(10) (type: char(10)), CAST( si AS CHAR(10) (type: char(10)), CAST( i AS CHAR(20) (type: char(20)), CAST( b AS CHAR(30) (type: char(30)), CAST( f AS CHAR(20) (type: char(20)), CAST( d AS CHAR(20) (type: char(20)), CAST( s AS CHAR(50) (type: char(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToChar(col 0, maxLength 10) -> 13:Char, CastLongToChar(col 1, maxLength 10) -> 14:Char, CastLongToChar(col 2, maxLength 20) -> 15:Char, CastLongToChar(col 3, maxLength 30) -> 16:Char, VectorUDFAdaptor(CAST( f AS CHAR(20)) -> 17:char(20), VectorUDFAdaptor(CAST( d AS CHAR(20)) -> 18:char(20), CastStringGroupToChar(col 8, maxLength 50) -> 19:Char Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.char_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: char(10)), _col1 (type: char(10)), _col2 (type: char(20)), _col3 (type: char(30)), _col4 (type: char(20)), _col5 (type: char(20)), _col6 (type: char(50)) + outputColumnNames: ct, csi, ci, cb, cf, cd, cs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(ct, 16), compute_stats(csi, 16), compute_stats(ci, 16), compute_stats(cb, 16), compute_stats(cf, 16), compute_stats(cd, 16), compute_stats(cs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -203,6 +224,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: ct, csi, ci, cb, cf, cd, cs + Column Types: char(10), char(10), char(20), char(30), char(20), char(20), char(50) + Table: default.char_lazy_binary_columnar + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_char_simple.q.out b/ql/src/test/results/clientpositive/vector_char_simple.q.out index c7b00fd..f89c1d3 100644 --- a/ql/src/test/results/clientpositive/vector_char_simple.q.out +++ b/ql/src/test/results/clientpositive/vector_char_simple.q.out @@ -212,6 +212,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -228,8 +230,8 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Execution mode: vectorized Map Vectorization: enabled: true @@ -244,11 +246,34 @@ STAGE PLANS: enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Reduce Operator Tree: + Group By Vectorization: + vectorOutput: false + native: false Stage: Stage-0 Stage: Stage-2 + Stage: Stage-4 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Vectorization: + vectorOutput: false + native: false + PREHOOK: query: insert into table char_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git a/ql/src/test/results/clientpositive/vector_groupby4.q.out b/ql/src/test/results/clientpositive/vector_groupby4.q.out index 799797d..9f82c1fb 100644 --- a/ql/src/test/results/clientpositive/vector_groupby4.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -35,6 +35,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -143,6 +146,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,6 +170,96 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT substr(srcorc.key,1,1) GROUP BY substr(srcorc.key,1,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_groupby6.q.out b/ql/src/test/results/clientpositive/vector_groupby6.q.out index 6fee467..483b8c2 100644 --- a/ql/src/test/results/clientpositive/vector_groupby6.q.out +++ b/ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -35,6 +35,9 @@ STAGE DEPENDENCIES: Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 + Stage-6 depends on stages: Stage-3, Stage-5 + Stage-4 depends on stages: Stage-2 + Stage-5 depends on stages: Stage-4 STAGE PLANS: Stage: Stage-1 @@ -143,6 +146,16 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.dest1 + Select Operator + expressions: _col0 (type: string) + outputColumnNames: c1 + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -157,6 +170,96 @@ STAGE PLANS: Stage: Stage-3 Stats-Aggr Operator + Stage: Stage-6 + Column Stats Work + Column Stats Desc: + Columns: c1 + Column Types: string + Table: default.dest1 + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + TableScan Vectorization: + native: true + projectedOutputColumns: [0] + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Reduce Sink Vectorization: + className: VectorReduceSinkOperator + native: false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Statistics: Num rows: 250 Data size: 44000 Basic stats: COMPLETE Column stats: NONE + value expressions: c1 (type: string) + Execution mode: vectorized + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + groupByVectorOutput: true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: FROM srcorc INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(srcorc.value,5,1) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/vector_multi_insert.q.out b/ql/src/test/results/clientpositive/vector_multi_insert.q.out index 4013cd4..c44a10f 100644 --- a/ql/src/test/results/clientpositive/vector_multi_insert.q.out +++ b/ql/src/test/results/clientpositive/vector_multi_insert.q.out @@ -85,23 +85,18 @@ STAGE DEPENDENCIES: Stage-6 Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 Stage-4 depends on stages: Stage-0 + Stage-14 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-15 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 + Stage-16 depends on stages: Stage-4, Stage-10, Stage-11, Stage-12, Stage-13 Stage-5 Stage-7 Stage-8 depends on stages: Stage-7 - Stage-15 depends on stages: Stage-3 , consists of Stage-12, Stage-11, Stage-13 - Stage-12 - Stage-1 depends on stages: Stage-12, Stage-11, Stage-14 + Stage-1 depends on stages: Stage-3 Stage-10 depends on stages: Stage-1 - Stage-11 - Stage-13 - Stage-14 depends on stages: Stage-13 - Stage-21 depends on stages: Stage-3 , consists of Stage-18, Stage-17, Stage-19 - Stage-18 - Stage-2 depends on stages: Stage-18, Stage-17, Stage-20 - Stage-16 depends on stages: Stage-2 - Stage-17 - Stage-19 - Stage-20 depends on stages: Stage-19 + Stage-11 depends on stages: Stage-3 + Stage-2 depends on stages: Stage-3 + Stage-12 depends on stages: Stage-2 + Stage-13 depends on stages: Stage-3 STAGE PLANS: Stage: Stage-3 @@ -125,6 +120,19 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) Filter Operator predicate: ((rn >= 100) and (rn < 1000)) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -140,6 +148,21 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn2 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (rn >= 1000) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE @@ -155,15 +178,44 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: rn + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(rn, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-9 Conditional Operator @@ -187,6 +239,27 @@ STAGE PLANS: Stage: Stage-4 Stats-Aggr Operator + Stage: Stage-14 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn1 + + Stage: Stage-15 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn2 + + Stage: Stage-16 + Column Stats Work + Column Stats Desc: + Columns: rn + Column Types: int + Table: default.orc_rn3 + Stage: Stage-5 Map Reduce Map Operator Tree: @@ -217,15 +290,6 @@ STAGE PLANS: hdfs directory: true #### A masked pattern was here #### - Stage: Stage-15 - Conditional Operator - - Stage: Stage-12 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - Stage: Stage-1 Move Operator tables: @@ -243,40 +307,33 @@ STAGE PLANS: Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 - - Stage: Stage-13 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn2 - - Stage: Stage-14 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### - - Stage: Stage-21 - Conditional Operator - - Stage: Stage-18 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-2 Move Operator @@ -288,38 +345,40 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.orc_rn3 - Stage: Stage-16 + Stage: Stage-12 Stats-Aggr Operator - Stage: Stage-17 - Map Reduce - Map Operator Tree: - TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 - - Stage: Stage-19 + Stage: Stage-13 Map Reduce Map Operator Tree: TableScan - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: default.orc_rn3 - - Stage: Stage-20 - Move Operator - files: - hdfs directory: true -#### A masked pattern was here #### + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe PREHOOK: query: from orc1 a insert overwrite table orc_rn1 select a.* where a.rn < 100 diff --git a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out index 38f13da..6b98dd2 100644 --- a/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out +++ b/ql/src/test/results/clientpositive/vector_tablesample_rows.q.out @@ -96,6 +96,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -108,38 +109,58 @@ STAGE PLANS: alias: alltypesorc Row Limit Per Split: 1 Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] Select Operator expressions: 17.29 (type: decimal(18,9)) outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [12] - selectExpressions: ConstantVectorExpression(val 17.29) -> 12:decimal(18,9) Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.decimal_2 - Execution mode: vectorized + Select Operator + expressions: 17.29 (type: decimal(18,9)) + outputColumnNames: t + Statistics: Num rows: 12288 Data size: 1376256 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(t, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 684 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 684 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 688 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -163,6 +184,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: t + Column Types: decimal(18,9) + Table: default.decimal_2 + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_udf_character_length.q.out b/ql/src/test/results/clientpositive/vector_udf_character_length.q.out index 81d801c..601ed7a 100644 --- a/ql/src/test/results/clientpositive/vector_udf_character_length.q.out +++ b/ql/src/test/results/clientpositive/vector_udf_character_length.q.out @@ -48,6 +48,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -71,7 +72,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -95,6 +121,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out b/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out index c71cfef..08844a7 100644 --- a/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out +++ b/ql/src/test/results/clientpositive/vector_udf_octet_length.q.out @@ -31,6 +31,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -54,7 +55,32 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest1 - Execution mode: vectorized + Select Operator + expressions: _col0 (type: int) + outputColumnNames: len + Statistics: Num rows: 25 Data size: 191 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(len, 16) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -78,6 +104,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: len + Column Types: int + Table: default.dest1 + Stage: Stage-3 Map Reduce Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_varchar_4.q.out b/ql/src/test/results/clientpositive/vector_varchar_4.q.out index 205c67a..45e1322 100644 --- a/ql/src/test/results/clientpositive/vector_varchar_4.q.out +++ b/ql/src/test/results/clientpositive/vector_varchar_4.q.out @@ -137,6 +137,7 @@ STAGE DEPENDENCIES: Stage-4 Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 Stage-2 depends on stages: Stage-0 + Stage-8 depends on stages: Stage-2 Stage-3 Stage-5 Stage-6 depends on stages: Stage-5 @@ -148,38 +149,58 @@ STAGE PLANS: TableScan alias: vectortab2korc Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] Select Operator expressions: CAST( t AS varchar(10)) (type: varchar(10)), CAST( si AS varchar(10)) (type: varchar(10)), CAST( i AS varchar(20)) (type: varchar(20)), CAST( b AS varchar(30)) (type: varchar(30)), CAST( f AS varchar(20)) (type: varchar(20)), CAST( d AS varchar(20)) (type: varchar(20)), CAST( s AS varchar(50)) (type: varchar(50)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumns: [13, 14, 15, 16, 17, 18, 19] - selectExpressions: CastLongToVarChar(col 0, maxLength 10) -> 13:VarChar, CastLongToVarChar(col 1, maxLength 10) -> 14:VarChar, CastLongToVarChar(col 2, maxLength 20) -> 15:VarChar, CastLongToVarChar(col 3, maxLength 30) -> 16:VarChar, VectorUDFAdaptor(CAST( f AS varchar(20))) -> 17:varchar(20), VectorUDFAdaptor(CAST( d AS varchar(20))) -> 18:varchar(20), CastStringGroupToVarChar(col 8, maxLength 50) -> 19:VarChar Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat serde: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe name: default.varchar_lazy_binary_columnar - Execution mode: vectorized + Select Operator + expressions: _col0 (type: varchar(10)), _col1 (type: varchar(10)), _col2 (type: varchar(20)), _col3 (type: varchar(30)), _col4 (type: varchar(20)), _col5 (type: varchar(20)), _col6 (type: varchar(50)) + outputColumnNames: vt, vsi, vi, vb, vf, vd, vs + Statistics: Num rows: 2000 Data size: 918712 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(vt, 16), compute_stats(vsi, 16), compute_stats(vi, 16), compute_stats(vb, 16), compute_stats(vf, 16), compute_stats(vd, 16), compute_stats(vs, 16) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - groupByVectorOutput: true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: true - vectorized: true + notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 3444 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-7 Conditional Operator @@ -203,6 +224,13 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-8 + Column Stats Work + Column Stats Desc: + Columns: vt, vsi, vi, vb, vf, vd, vs + Column Types: varchar(10), varchar(10), varchar(20), varchar(30), varchar(20), varchar(20), varchar(50) + Table: default.varchar_lazy_binary_columnar + Stage: Stage-3 Merge File Operator Map Operator Tree: diff --git a/ql/src/test/results/clientpositive/vector_varchar_simple.q.out b/ql/src/test/results/clientpositive/vector_varchar_simple.q.out index 1bed203..767d39e 100644 --- a/ql/src/test/results/clientpositive/vector_varchar_simple.q.out +++ b/ql/src/test/results/clientpositive/vector_varchar_simple.q.out @@ -276,6 +276,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 + Stage-4 depends on stages: Stage-2, Stage-3 + Stage-3 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -306,10 +308,9 @@ STAGE PLANS: Reduce Sink Vectorization: className: VectorReduceSinkOperator native: false - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false, No TopN IS false + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: int) Execution mode: vectorized Map Vectorization: @@ -344,6 +345,25 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.varchar_3 + Select Operator + expressions: _col0 (type: varchar(25)) + outputColumnNames: field + Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(field, 16) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Stage: Stage-0 Move Operator @@ -358,6 +378,49 @@ STAGE PLANS: Stage: Stage-2 Stats-Aggr Operator + Stage: Stage-4 + Column Stats Work + Column Stats Desc: + Columns: field + Column Types: varchar(25) + Table: default.varchar_3 + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.SequenceFileInputFormat + notVectorizedReason: Value expression for REDUCESINK operator: Data type struct of Column[_col0] not supported + vectorized: false + Reduce Vectorization: + enabled: false + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true + enableConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + Group By Vectorization: + vectorOutput: false + native: false + projectedOutputColumns: null + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: insert into table varchar_3 select cint from alltypesorc limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc diff --git a/ql/src/test/results/clientpositive/vectorized_context.q.out b/ql/src/test/results/clientpositive/vectorized_context.q.out index dac9b6b..09efe09 100644 --- a/ql/src/test/results/clientpositive/vectorized_context.q.out +++ b/ql/src/test/results/clientpositive/vectorized_context.q.out @@ -109,29 +109,14 @@ STAGE PLANS: Stage: Stage-7 Map Reduce Local Work Alias -> Map Local Tables: - $hdt$_0:store_sales + $hdt$_1:household_demographics Fetch Operator limit: -1 - $hdt$_2:household_demographics + $hdt$_2:store Fetch Operator limit: -1 Alias -> Map Local Operator Tree: - $hdt$_0:store_sales - TableScan - alias: store_sales - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE - HashTable Sink Operator - keys: - 0 _col0 (type: int) - 1 _col0 (type: int) - $hdt$_2:household_demographics + $hdt$_1:household_demographics TableScan alias: household_demographics Statistics: Num rows: 6075 Data size: 24300 Basic stats: COMPLETE Column stats: NONE @@ -146,10 +131,7 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col0 (type: int) - - Stage: Stage-5 - Map Reduce - Map Operator Tree: + $hdt$_2:store TableScan alias: store Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE @@ -160,24 +142,42 @@ STAGE PLANS: expressions: s_store_sk (type: int), s_city (type: string) outputColumnNames: _col0, _col1 Statistics: Num rows: 6075 Data size: 615632 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (ss_store_sk is not null and ss_hdemo_sk is not null) (type: boolean) + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ss_store_sk (type: int), ss_hdemo_sk (type: int), ss_net_profit (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6075 Data size: 72736 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col4 + outputColumnNames: _col0, _col2 Statistics: Num rows: 6682 Data size: 80009 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col4 + outputColumnNames: _col2, _col5 Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col4 (type: string), _col2 (type: double) + expressions: _col5 (type: string), _col2 (type: double) outputColumnNames: _col0, _col1 Statistics: Num rows: 7350 Data size: 88009 Basic stats: COMPLETE Column stats: NONE Limit